diff --git a/mk/grammar.mk b/mk/grammar.mk index 12190fb034854..a9f45907b8110 100644 --- a/mk/grammar.mk +++ b/mk/grammar.mk @@ -30,17 +30,25 @@ endef $(BG): $(Q)mkdir -p $(BG) -$(BG)RustLexer.class: $(SG)RustLexer.g4 +$(BG)RustLexer.class: $(BG) $(SG)RustLexer.g4 $(Q)$(CFG_ANTLR4) -o $(B)grammar $(SG)RustLexer.g4 $(Q)$(CFG_JAVAC) -d $(BG) $(BG)RustLexer.java -$(BG)verify: $(SG)verify.rs rustc-stage2-H-$(CFG_BUILD) $(LD)stamp.regex_macros $(LD)stamp.rustc - $(Q)$(RUSTC) -O --out-dir $(BG) -L $(L) $(SG)verify.rs +check-build-lexer-verifier: $(BG)verify + +ifeq ($(NO_REBUILD),) +VERIFY_DEPS := rustc-stage2-H-$(CFG_BUILD) $(LD)stamp.regex_macros $(LD)stamp.rustc +else +VERIFY_DEPS := +endif + +$(BG)verify: $(BG) $(SG)verify.rs $(VERIFY_DEPS) + $(Q)$(RUSTC) --out-dir $(BG) -L $(L) $(SG)verify.rs ifdef CFG_JAVAC ifdef CFG_ANTLR4 ifdef CFG_GRUN -check-lexer: $(BG) $(BG)RustLexer.class $(BG)verify +check-lexer: $(BG) $(BG)RustLexer.class check-build-lexer-verifier $(info Verifying libsyntax against the reference lexer ...) $(Q)$(SG)check.sh $(S) "$(BG)" \ "$(CFG_GRUN)" "$(BG)verify" "$(BG)RustLexer.tokens" diff --git a/mk/tests.mk b/mk/tests.mk index 4433d780dedf6..63a34e0f01007 100644 --- a/mk/tests.mk +++ b/mk/tests.mk @@ -199,7 +199,7 @@ check-docs: cleantestlibs cleantmptestlogs check-stage2-docs # Some less critical tests that are not prone to breakage. # Not run as part of the normal test suite, but tested by bors on checkin. -check-secondary: check-build-compiletest check-lexer check-pretty +check-secondary: check-build-compiletest check-build-lexer-verifier check-lexer check-pretty # check + check-secondary. # diff --git a/src/doc/reference.md b/src/doc/reference.md index 62e0f5e4f1f48..0b4a745f98716 100644 --- a/src/doc/reference.md +++ b/src/doc/reference.md @@ -216,9 +216,15 @@ rather than referring to it by name or some other evaluation rule. A literal is a form of constant expression, so is evaluated (primarily) at compile time. ```{.ebnf .gram} -literal : string_lit | char_lit | byte_string_lit | byte_lit | num_lit ; +lit_suffix : ident; +literal : [ string_lit | char_lit | byte_string_lit | byte_lit | num_lit ] lit_suffix ?; ``` +The optional suffix is only used for certain numeric literals, but is +reserved for future extension, that is, the above gives the lexical +grammar, but a Rust parser will reject everything but the 12 special +cases mentioned in [Number literals](#number-literals) below. + #### Character and string literals ```{.ebnf .gram} @@ -371,27 +377,20 @@ b"\\x52"; br"\x52"; // \x52 #### Number literals ```{.ebnf .gram} -num_lit : nonzero_dec [ dec_digit | '_' ] * num_suffix ? - | '0' [ [ dec_digit | '_' ] * num_suffix ? - | 'b' [ '1' | '0' | '_' ] + int_suffix ? - | 'o' [ oct_digit | '_' ] + int_suffix ? - | 'x' [ hex_digit | '_' ] + int_suffix ? ] ; - -num_suffix : int_suffix | float_suffix ; +num_lit : nonzero_dec [ dec_digit | '_' ] * float_suffix ? + | '0' [ [ dec_digit | '_' ] * float_suffix ? + | 'b' [ '1' | '0' | '_' ] + + | 'o' [ oct_digit | '_' ] + + | 'x' [ hex_digit | '_' ] + ] ; -int_suffix : 'u' int_suffix_size ? - | 'i' int_suffix_size ? ; -int_suffix_size : [ '8' | "16" | "32" | "64" ] ; +float_suffix : [ exponent | '.' dec_lit exponent ? ] ? ; -float_suffix : [ exponent | '.' dec_lit exponent ? ] ? float_suffix_ty ? ; -float_suffix_ty : 'f' [ "32" | "64" ] ; exponent : ['E' | 'e'] ['-' | '+' ] ? dec_lit ; dec_lit : [ dec_digit | '_' ] + ; ``` A _number literal_ is either an _integer literal_ or a _floating-point -literal_. The grammar for recognizing the two kinds of literals is mixed, as -they are differentiated by suffixes. +literal_. The grammar for recognizing the two kinds of literals is mixed. ##### Integer literals @@ -406,9 +405,9 @@ An _integer literal_ has one of four forms: * A _binary literal_ starts with the character sequence `U+0030` `U+0062` (`0b`) and continues as any mixture of binary digits and underscores. -An integer literal may be followed (immediately, without any spaces) by an -_integer suffix_, which changes the type of the literal. There are two kinds of -integer literal suffix: +Like any literal, an integer literal may be followed (immediately, +without any spaces) by an _integer suffix_, which forcibly sets the +type of the literal. There are 10 valid values for an integer suffix: * The `i` and `u` suffixes give the literal type `int` or `uint`, respectively. @@ -443,11 +442,9 @@ A _floating-point literal_ has one of two forms: * A single _decimal literal_ followed by an _exponent_. By default, a floating-point literal has a generic type, and, like integer -literals, the type must be uniquely determined from the context. A -floating-point literal may be followed (immediately, without any spaces) by a -_floating-point suffix_, which changes the type of the literal. There are two -floating-point suffixes: `f32`, and `f64` (the 32-bit and 64-bit floating point -types). +literals, the type must be uniquely determined from the context. There are two valid +_floating-point suffixes_, `f32` and `f64` (the 32-bit and 64-bit floating point +types), which explicitly determine the type of the literal. Examples of floating-point literals of various forms: diff --git a/src/grammar/RustLexer.g4 b/src/grammar/RustLexer.g4 index e8165dabce5cd..0ff9af7aca133 100644 --- a/src/grammar/RustLexer.g4 +++ b/src/grammar/RustLexer.g4 @@ -92,49 +92,35 @@ fragment CHAR_ESCAPE | 'U' HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT ; -LIT_CHAR - : '\'' ( '\\' CHAR_ESCAPE | ~[\\'\n\t\r] ) '\'' +fragment SUFFIX + : IDENT ; -LIT_BYTE - : 'b\'' ( '\\' ( [xX] HEXIT HEXIT | [nrt\\'"0] ) | ~[\\'\n\t\r] ) '\'' +LIT_CHAR + : '\'' ( '\\' CHAR_ESCAPE | ~[\\'\n\t\r] ) '\'' SUFFIX? ; -fragment INT_SUFFIX - : 'i' - | 'i8' - | 'i16' - | 'i32' - | 'i64' - | 'u' - | 'u8' - | 'u16' - | 'u32' - | 'u64' +LIT_BYTE + : 'b\'' ( '\\' ( [xX] HEXIT HEXIT | [nrt\\'"0] ) | ~[\\'\n\t\r] ) '\'' SUFFIX? ; LIT_INTEGER - : [0-9][0-9_]* INT_SUFFIX? - | '0b' [01][01_]* INT_SUFFIX? - | '0o' [0-7][0-7_]* INT_SUFFIX? - | '0x' [0-9a-fA-F][0-9a-fA-F_]* INT_SUFFIX? - ; - -fragment FLOAT_SUFFIX - : 'f32' - | 'f64' + : [0-9][0-9_]* SUFFIX? + | '0b' [01][01_]* SUFFIX? + | '0o' [0-7][0-7_]* SUFFIX? + | '0x' [0-9a-fA-F][0-9a-fA-F_]* SUFFIX? ; LIT_FLOAT - : [0-9][0-9_]* ('.' | ('.' [0-9][0-9_]*)? ([eE] [-+]? [0-9][0-9_]*)? FLOAT_SUFFIX?) + : [0-9][0-9_]* ('.' | ('.' [0-9][0-9_]*)? ([eE] [-+]? [0-9][0-9_]*)? SUFFIX?) ; LIT_STR - : '"' ('\\\n' | '\\\r\n' | '\\' CHAR_ESCAPE | .)*? '"' + : '"' ('\\\n' | '\\\r\n' | '\\' CHAR_ESCAPE | .)*? '"' SUFFIX? ; -LIT_BINARY : 'b' LIT_STR ; -LIT_BINARY_RAW : 'rb' LIT_STR_RAW ; +LIT_BINARY : 'b' LIT_STR SUFFIX?; +LIT_BINARY_RAW : 'rb' LIT_STR_RAW SUFFIX?; /* this is a bit messy */ @@ -148,7 +134,7 @@ fragment LIT_STR_RAW_INNER2 ; LIT_STR_RAW - : 'r' LIT_STR_RAW_INNER + : 'r' LIT_STR_RAW_INNER SUFFIX? ; IDENT : XID_start XID_continue* ; diff --git a/src/grammar/verify.rs b/src/grammar/verify.rs index 159a62f011072..e3ff20f7874bf 100644 --- a/src/grammar/verify.rs +++ b/src/grammar/verify.rs @@ -26,21 +26,21 @@ use std::io::File; use syntax::parse; use syntax::parse::lexer; -use rustc::driver::{session, config}; +use rustc::session::{mod, config}; use syntax::ast; use syntax::ast::Name; use syntax::parse::token; use syntax::parse::lexer::TokenAndSpan; -fn parse_token_list(file: &str) -> HashMap { - fn id() -> Token { +fn parse_token_list(file: &str) -> HashMap { + fn id() -> token::Token { token::Ident(ast::Ident { name: Name(0), ctxt: 0, }, token::Plain) } let mut res = HashMap::new(); - res.insert("-1".to_string(), EOF); + res.insert("-1".to_string(), token::Eof); for line in file.split('\n') { let eq = match line.trim().rfind('=') { @@ -60,8 +60,8 @@ fn parse_token_list(file: &str) -> HashMap { "INT_SUFFIX" => id(), "SHL" => token::BinOp(token::Shl), "LBRACE" => token::OpenDelim(token::Brace), - "RARROW" => token::Rarrow, - "LIT_STR" => token::LitStr(Name(0)), + "RARROW" => token::RArrow, + "LIT_STR" => token::Literal(token::Str_(Name(0))), "DOTDOT" => token::DotDot, "MOD_SEP" => token::ModSep, "DOTDOTDOT" => token::DotDotDot, @@ -71,17 +71,17 @@ fn parse_token_list(file: &str) -> HashMap { "ANDAND" => token::AndAnd, "AT" => token::At, "LBRACKET" => token::OpenDelim(token::Bracket), - "LIT_STR_RAW" => token::LitStrRaw(Name(0), 0), + "LIT_STR_RAW" => token::Literal(token::StrRaw(Name(0), 0)), "RPAREN" => token::CloseDelim(token::Paren), "SLASH" => token::BinOp(token::Slash), "COMMA" => token::Comma, "LIFETIME" => token::Lifetime(ast::Ident { name: Name(0), ctxt: 0 }), "CARET" => token::BinOp(token::Caret), "TILDE" => token::Tilde, - "IDENT" => token::Id(), + "IDENT" => id(), "PLUS" => token::BinOp(token::Plus), - "LIT_CHAR" => token::LitChar(Name(0)), - "LIT_BYTE" => token::LitByte(Name(0)), + "LIT_CHAR" => token::Literal(token::Char(Name(0))), + "LIT_BYTE" => token::Literal(token::Byte(Name(0))), "EQ" => token::Eq, "RBRACKET" => token::CloseDelim(token::Bracket), "COMMENT" => token::Comment, @@ -95,9 +95,9 @@ fn parse_token_list(file: &str) -> HashMap { "BINOP" => token::BinOp(token::Plus), "POUND" => token::Pound, "OROR" => token::OrOr, - "LIT_INTEGER" => token::LitInteger(Name(0)), + "LIT_INTEGER" => token::Literal(token::Integer(Name(0))), "BINOPEQ" => token::BinOpEq(token::Plus), - "LIT_FLOAT" => token::LitFloat(Name(0)), + "LIT_FLOAT" => token::Literal(token::Float(Name(0))), "WHITESPACE" => token::Whitespace, "UNDERSCORE" => token::Underscore, "MINUS" => token::BinOp(token::Minus), @@ -107,8 +107,8 @@ fn parse_token_list(file: &str) -> HashMap { "OR" => token::BinOp(token::Or), "GT" => token::Gt, "LE" => token::Le, - "LIT_BINARY" => token::LitBinary(Name(0)), - "LIT_BINARY_RAW" => token::LitBinaryRaw(Name(0), 0), + "LIT_BINARY" => token::Literal(token::Binary(Name(0))), + "LIT_BINARY_RAW" => token::Literal(token::BinaryRaw(Name(0), 0)), _ => continue, }; @@ -119,7 +119,7 @@ fn parse_token_list(file: &str) -> HashMap { res } -fn str_to_binop(s: &str) -> BinOpToken { +fn str_to_binop(s: &str) -> token::BinOpToken { match s { "+" => token::Plus, "/" => token::Slash, @@ -167,7 +167,7 @@ fn count(lit: &str) -> uint { lit.chars().take_while(|c| *c == '#').count() } -fn parse_antlr_token(s: &str, tokens: &HashMap) -> TokenAndSpan { +fn parse_antlr_token(s: &str, tokens: &HashMap) -> TokenAndSpan { let re = regex!( r"\[@(?P\d+),(?P\d+):(?P\d+)='(?P.+?)',<(?P-?\d+)>,\d+:\d+]" ); @@ -178,7 +178,7 @@ fn parse_antlr_token(s: &str, tokens: &HashMap) -> TokenAndSpan { let toknum = m.name("toknum"); let content = m.name("content"); - let proto_tok = tokens.get(&toknum).expect(format!("didn't find token {} in the map", + let proto_tok = tokens.get(toknum).expect(format!("didn't find token {} in the map", toknum).as_slice()); let nm = parse::token::intern(content); @@ -189,22 +189,25 @@ fn parse_antlr_token(s: &str, tokens: &HashMap) -> TokenAndSpan { token::BinOp(..) => token::BinOp(str_to_binop(content)), token::BinOpEq(..) => token::BinOpEq(str_to_binop(content.slice_to( content.len() - 1))), - token::LitStr(..) => token::LitStr(fix(content)), - token::LitStrRaw(..) => token::LitStrRaw(fix(content), count(content)), - token::LitChar(..) => token::LitChar(fixchar(content)), - token::LitByte(..) => token::LitByte(fixchar(content)), + token::Literal(token::Str_(..)) => token::Literal(token::Str_(fix(content))), + token::Literal(token::StrRaw(..)) => token::Literal(token::StrRaw(fix(content), + count(content))), + token::Literal(token::Char(..)) => token::Literal(token::Char(fixchar(content))), + token::Literal(token::Byte(..)) => token::Literal(token::Byte(fixchar(content))), token::DocComment(..) => token::DocComment(nm), - token::LitInteger(..) => token::LitInteger(nm), - token::LitFloat(..) => token::LitFloat(nm), - token::LitBinary(..) => token::LitBinary(nm), - token::LitBinaryRaw(..) => token::LitBinaryRaw(fix(content), count(content)), + token::Literal(token::Integer(..)) => token::Literal(token::Integer(nm)), + token::Literal(token::Float(..)) => token::Literal(token::Float(nm)), + token::Literal(token::Binary(..)) => token::Literal(token::Binary(nm)), + token::Literal(token::BinaryRaw(..)) => token::Literal(token::BinaryRaw(fix(content), + count(content))), token::Ident(..) => token::Ident(ast::Ident { name: nm, ctxt: 0 }, token::ModName), token::Lifetime(..) => token::Lifetime(ast::Ident { name: nm, ctxt: 0 }), ref t => t.clone() }; - let offset = if real_tok == EOF { + let offset = if real_tok == token::Eof + { 1 } else { 0 @@ -222,7 +225,7 @@ fn parse_antlr_token(s: &str, tokens: &HashMap) -> TokenAndSpan { } } -fn tok_cmp(a: &Token, b: &Token) -> bool { +fn tok_cmp(a: &token::Token, b: &token::Token) -> bool { match a { &token::Ident(id, _) => match b { &token::Ident(id2, _) => id == id2, @@ -240,17 +243,17 @@ fn main() { let args = std::os::args(); - let mut token_file = File::open(&Path::new(args.get(2).as_slice())); + let mut token_file = File::open(&Path::new(args[2].as_slice())); let token_map = parse_token_list(token_file.read_to_string().unwrap().as_slice()); let mut stdin = std::io::stdin(); let mut antlr_tokens = stdin.lines().map(|l| parse_antlr_token(l.unwrap().as_slice().trim(), &token_map)); - let code = File::open(&Path::new(args.get(1).as_slice())).unwrap().read_to_string().unwrap(); + let code = File::open(&Path::new(args[1].as_slice())).unwrap().read_to_string().unwrap(); let options = config::basic_options(); let session = session::build_session(options, None, - syntax::diagnostics::registry::Registry::new([])); + syntax::diagnostics::registry::Registry::new(&[])); let filemap = parse::string_to_filemap(&session.parse_sess, code, String::from_str("")); @@ -258,7 +261,7 @@ fn main() { for antlr_tok in antlr_tokens { let rustc_tok = next(&mut lexer); - if rustc_tok.tok == EOF && antlr_tok.tok == EOF { + if rustc_tok.tok == token::Eof && antlr_tok.tok == token::Eof { continue } @@ -284,19 +287,19 @@ fn main() { ) matches!( - LitByte(..), - LitChar(..), - LitInteger(..), - LitFloat(..), - LitStr(..), - LitStrRaw(..), - LitBinary(..), - LitBinaryRaw(..), - Ident(..), - Lifetime(..), - Interpolated(..), - DocComment(..), - Shebang(..) + token::Literal(token::Byte(..)), + token::Literal(token::Char(..)), + token::Literal(token::Integer(..)), + token::Literal(token::Float(..)), + token::Literal(token::Str_(..)), + token::Literal(token::StrRaw(..)), + token::Literal(token::Binary(..)), + token::Literal(token::BinaryRaw(..)), + token::Ident(..), + token::Lifetime(..), + token::Interpolated(..), + token::DocComment(..), + token::Shebang(..) ); } } diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index d445da9d1340d..111650f565cf6 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -128,12 +128,17 @@ fn doit(sess: &parse::ParseSess, mut lexer: lexer::StringReader, } } - // text literals - token::LitByte(..) | token::LitBinary(..) | token::LitBinaryRaw(..) | - token::LitChar(..) | token::LitStr(..) | token::LitStrRaw(..) => "string", - - // number literals - token::LitInteger(..) | token::LitFloat(..) => "number", + token::Literal(lit, _suf) => { + match lit { + // text literals + token::Byte(..) | token::Char(..) | + token::Binary(..) | token::BinaryRaw(..) | + token::Str_(..) | token::StrRaw(..) => "string", + + // number literals + token::Integer(..) | token::Float(..) => "number", + } + } // keywords are also included in the identifier set token::Ident(ident, _is_mod_sep) => { diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs index 15e14902727f5..7b16c08785926 100644 --- a/src/libsyntax/ast.rs +++ b/src/libsyntax/ast.rs @@ -838,7 +838,7 @@ impl TokenTree { tts: vec![TtToken(sp, token::Ident(token::str_to_ident("doc"), token::Plain)), TtToken(sp, token::Eq), - TtToken(sp, token::LitStr(name))], + TtToken(sp, token::Literal(token::Str_(name), None))], close_span: sp, })) } diff --git a/src/libsyntax/diagnostics/plugin.rs b/src/libsyntax/diagnostics/plugin.rs index d077fbd7bf00f..281bde3129aba 100644 --- a/src/libsyntax/diagnostics/plugin.rs +++ b/src/libsyntax/diagnostics/plugin.rs @@ -87,7 +87,7 @@ pub fn expand_register_diagnostic<'cx>(ecx: &'cx mut ExtCtxt, }, [ast::TtToken(_, token::Ident(ref code, _)), ast::TtToken(_, token::Comma), - ast::TtToken(_, token::LitStrRaw(description, _))] => { + ast::TtToken(_, token::Literal(token::StrRaw(description, _), None))] => { (code, Some(description)) } _ => unreachable!() diff --git a/src/libsyntax/ext/quote.rs b/src/libsyntax/ext/quote.rs index ec69175707746..eaa3632cf499e 100644 --- a/src/libsyntax/ext/quote.rs +++ b/src/libsyntax/ext/quote.rs @@ -542,6 +542,16 @@ fn mk_delim(cx: &ExtCtxt, sp: Span, delim: token::DelimToken) -> P { #[allow(non_upper_case_globals)] fn mk_token(cx: &ExtCtxt, sp: Span, tok: &token::Token) -> P { + macro_rules! mk_lit { + ($name: expr, $suffix: expr, $($args: expr),*) => {{ + let inner = cx.expr_call(sp, mk_token_path(cx, sp, $name), vec![$($args),*]); + let suffix = match $suffix { + Some(name) => cx.expr_some(sp, mk_name(cx, sp, ast::Ident::new(name))), + None => cx.expr_none(sp) + }; + cx.expr_call(sp, mk_token_path(cx, sp, "Literal"), vec![inner, suffix]) + }} + } match *tok { token::BinOp(binop) => { return cx.expr_call(sp, mk_token_path(cx, sp, "BinOp"), vec!(mk_binop(cx, sp, binop))); @@ -560,38 +570,32 @@ fn mk_token(cx: &ExtCtxt, sp: Span, tok: &token::Token) -> P { vec![mk_delim(cx, sp, delim)]); } - token::LitByte(i) => { + token::Literal(token::Byte(i), suf) => { let e_byte = mk_name(cx, sp, i.ident()); - - return cx.expr_call(sp, mk_token_path(cx, sp, "LitByte"), vec!(e_byte)); + return mk_lit!("Byte", suf, e_byte); } - token::LitChar(i) => { + token::Literal(token::Char(i), suf) => { let e_char = mk_name(cx, sp, i.ident()); - - return cx.expr_call(sp, mk_token_path(cx, sp, "LitChar"), vec!(e_char)); + return mk_lit!("Char", suf, e_char); } - token::LitInteger(i) => { + token::Literal(token::Integer(i), suf) => { let e_int = mk_name(cx, sp, i.ident()); - return cx.expr_call(sp, mk_token_path(cx, sp, "LitInteger"), vec!(e_int)); + return mk_lit!("Integer", suf, e_int); } - token::LitFloat(fident) => { + token::Literal(token::Float(fident), suf) => { let e_fident = mk_name(cx, sp, fident.ident()); - return cx.expr_call(sp, mk_token_path(cx, sp, "LitFloat"), vec!(e_fident)); + return mk_lit!("Float", suf, e_fident); } - token::LitStr(ident) => { - return cx.expr_call(sp, - mk_token_path(cx, sp, "LitStr"), - vec!(mk_name(cx, sp, ident.ident()))); + token::Literal(token::Str_(ident), suf) => { + return mk_lit!("Str_", suf, mk_name(cx, sp, ident.ident())) } - token::LitStrRaw(ident, n) => { - return cx.expr_call(sp, - mk_token_path(cx, sp, "LitStrRaw"), - vec!(mk_name(cx, sp, ident.ident()), cx.expr_uint(sp, n))); + token::Literal(token::StrRaw(ident, n), suf) => { + return mk_lit!("StrRaw", suf, mk_name(cx, sp, ident.ident()), cx.expr_uint(sp, n)) } token::Ident(ident, style) => { diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 01a66243a965f..fbca4868255ff 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -369,6 +369,25 @@ impl<'a> StringReader<'a> { self.nextnextch() == Some(c) } + /// Eats *, if possible. + fn scan_optional_raw_name(&mut self) -> Option { + if !ident_start(self.curr) { + return None + } + let start = self.last_pos; + while ident_continue(self.curr) { + self.bump(); + } + + self.with_str_from(start, |string| { + if string == "_" { + None + } else { + Some(token::intern(string)) + } + }) + } + /// PRECONDITION: self.curr is not whitespace /// Eats any kind of comment. fn scan_comment(&mut self) -> Option { @@ -638,7 +657,7 @@ impl<'a> StringReader<'a> { } /// Lex a LIT_INTEGER or a LIT_FLOAT - fn scan_number(&mut self, c: char) -> token::Token { + fn scan_number(&mut self, c: char) -> token::Lit { let mut num_digits; let mut base = 10; let start_bpos = self.last_pos; @@ -653,19 +672,9 @@ impl<'a> StringReader<'a> { '0'...'9' | '_' | '.' => { num_digits = self.scan_digits(10) + 1; } - 'u' | 'i' => { - self.scan_int_suffix(); - return token::LitInteger(self.name_from(start_bpos)); - }, - 'f' => { - let last_pos = self.last_pos; - self.scan_float_suffix(); - self.check_float_base(start_bpos, last_pos, base); - return token::LitFloat(self.name_from(start_bpos)); - } _ => { // just a 0 - return token::LitInteger(self.name_from(start_bpos)); + return token::Integer(self.name_from(start_bpos)); } } } else if c.is_digit_radix(10) { @@ -676,9 +685,7 @@ impl<'a> StringReader<'a> { if num_digits == 0 { self.err_span_(start_bpos, self.last_pos, "no valid digits found for number"); - // eat any suffix - self.scan_int_suffix(); - return token::LitInteger(token::intern("0")); + return token::Integer(token::intern("0")); } // might be a float, but don't be greedy if this is actually an @@ -692,29 +699,20 @@ impl<'a> StringReader<'a> { if self.curr.unwrap_or('\0').is_digit_radix(10) { self.scan_digits(10); self.scan_float_exponent(); - self.scan_float_suffix(); } let last_pos = self.last_pos; self.check_float_base(start_bpos, last_pos, base); - return token::LitFloat(self.name_from(start_bpos)); - } else if self.curr_is('f') { - // or it might be an integer literal suffixed as a float - self.scan_float_suffix(); - let last_pos = self.last_pos; - self.check_float_base(start_bpos, last_pos, base); - return token::LitFloat(self.name_from(start_bpos)); + return token::Float(self.name_from(start_bpos)); } else { // it might be a float if it has an exponent if self.curr_is('e') || self.curr_is('E') { self.scan_float_exponent(); - self.scan_float_suffix(); let last_pos = self.last_pos; self.check_float_base(start_bpos, last_pos, base); - return token::LitFloat(self.name_from(start_bpos)); + return token::Float(self.name_from(start_bpos)); } // but we certainly have an integer! - self.scan_int_suffix(); - return token::LitInteger(self.name_from(start_bpos)); + return token::Integer(self.name_from(start_bpos)); } } @@ -850,55 +848,6 @@ impl<'a> StringReader<'a> { true } - /// Scan over an int literal suffix. - fn scan_int_suffix(&mut self) { - match self.curr { - Some('i') | Some('u') => { - self.bump(); - - if self.curr_is('8') { - self.bump(); - } else if self.curr_is('1') { - if !self.nextch_is('6') { - self.err_span_(self.last_pos, self.pos, - "illegal int suffix"); - } else { - self.bump(); self.bump(); - } - } else if self.curr_is('3') { - if !self.nextch_is('2') { - self.err_span_(self.last_pos, self.pos, - "illegal int suffix"); - } else { - self.bump(); self.bump(); - } - } else if self.curr_is('6') { - if !self.nextch_is('4') { - self.err_span_(self.last_pos, self.pos, - "illegal int suffix"); - } else { - self.bump(); self.bump(); - } - } - }, - _ => { } - } - } - - /// Scan over a float literal suffix - fn scan_float_suffix(&mut self) { - if self.curr_is('f') { - if (self.nextch_is('3') && self.nextnextch_is('2')) - || (self.nextch_is('6') && self.nextnextch_is('4')) { - self.bump(); - self.bump(); - self.bump(); - } else { - self.err_span_(self.last_pos, self.pos, "illegal float suffix"); - } - } - } - /// Scan over a float exponent. fn scan_float_exponent(&mut self) { if self.curr_is('e') || self.curr_is('E') { @@ -967,7 +916,10 @@ impl<'a> StringReader<'a> { } if is_dec_digit(c) { - return self.scan_number(c.unwrap()); + let num = self.scan_number(c.unwrap()); + let suffix = self.scan_optional_raw_name(); + debug!("next_token_inner: scanned number {}, {}", num, suffix); + return token::Literal(num, suffix) } if self.read_embedded_ident { @@ -1126,17 +1078,19 @@ impl<'a> StringReader<'a> { } let id = if valid { self.name_from(start) } else { token::intern("0") }; self.bump(); // advance curr past token - return token::LitChar(id); + let suffix = self.scan_optional_raw_name(); + return token::Literal(token::Char(id), suffix); } 'b' => { self.bump(); - return match self.curr { + let lit = match self.curr { Some('\'') => self.scan_byte(), Some('"') => self.scan_byte_string(), Some('r') => self.scan_raw_byte_string(), _ => unreachable!() // Should have been a token::Ident above. }; - + let suffix = self.scan_optional_raw_name(); + return token::Literal(lit, suffix); } '"' => { let start_bpos = self.last_pos; @@ -1157,7 +1111,8 @@ impl<'a> StringReader<'a> { let id = if valid { self.name_from(start_bpos + BytePos(1)) } else { token::intern("??") }; self.bump(); - return token::LitStr(id); + let suffix = self.scan_optional_raw_name(); + return token::Literal(token::Str_(id), suffix); } 'r' => { let start_bpos = self.last_pos; @@ -1224,7 +1179,8 @@ impl<'a> StringReader<'a> { } else { token::intern("??") }; - return token::LitStrRaw(id, hash_count); + let suffix = self.scan_optional_raw_name(); + return token::Literal(token::StrRaw(id, hash_count), suffix); } '-' => { if self.nextch_is('>') { @@ -1293,7 +1249,7 @@ impl<'a> StringReader<'a> { || (self.curr_is('#') && self.nextch_is('!') && !self.nextnextch_is('[')) } - fn scan_byte(&mut self) -> token::Token { + fn scan_byte(&mut self) -> token::Lit { self.bump(); let start = self.last_pos; @@ -1314,10 +1270,10 @@ impl<'a> StringReader<'a> { let id = if valid { self.name_from(start) } else { token::intern("??") }; self.bump(); // advance curr past token - return token::LitByte(id); + return token::Byte(id); } - fn scan_byte_string(&mut self) -> token::Token { + fn scan_byte_string(&mut self) -> token::Lit { self.bump(); let start = self.last_pos; let mut valid = true; @@ -1336,10 +1292,10 @@ impl<'a> StringReader<'a> { } let id = if valid { self.name_from(start) } else { token::intern("??") }; self.bump(); - return token::LitBinary(id); + return token::Binary(id); } - fn scan_raw_byte_string(&mut self) -> token::Token { + fn scan_raw_byte_string(&mut self) -> token::Lit { let start_bpos = self.last_pos; self.bump(); let mut hash_count = 0u; @@ -1387,8 +1343,9 @@ impl<'a> StringReader<'a> { self.bump(); } self.bump(); - return token::LitBinaryRaw(self.name_from_to(content_start_bpos, content_end_bpos), - hash_count); + return token::BinaryRaw(self.name_from_to(content_start_bpos, + content_end_bpos), + hash_count); } } @@ -1535,17 +1492,17 @@ mod test { #[test] fn character_a() { assert_eq!(setup(&mk_sh(), "'a'".to_string()).next_token().tok, - token::LitChar(token::intern("a"))); + token::Literal(token::Char(token::intern("a")), None)); } #[test] fn character_space() { assert_eq!(setup(&mk_sh(), "' '".to_string()).next_token().tok, - token::LitChar(token::intern(" "))); + token::Literal(token::Char(token::intern(" ")), None)); } #[test] fn character_escaped() { assert_eq!(setup(&mk_sh(), "'\\n'".to_string()).next_token().tok, - token::LitChar(token::intern("\\n"))); + token::Literal(token::Char(token::intern("\\n")), None)); } #[test] fn lifetime_name() { @@ -1557,7 +1514,41 @@ mod test { assert_eq!(setup(&mk_sh(), "r###\"\"#a\\b\x00c\"\"###".to_string()).next_token() .tok, - token::LitStrRaw(token::intern("\"#a\\b\x00c\""), 3)); + token::Literal(token::StrRaw(token::intern("\"#a\\b\x00c\""), 3), None)); + } + + #[test] fn literal_suffixes() { + macro_rules! test { + ($input: expr, $tok_type: ident, $tok_contents: expr) => {{ + assert_eq!(setup(&mk_sh(), format!("{}suffix", $input)).next_token().tok, + token::Literal(token::$tok_type(token::intern($tok_contents)), + Some(token::intern("suffix")))); + // with a whitespace separator: + assert_eq!(setup(&mk_sh(), format!("{} suffix", $input)).next_token().tok, + token::Literal(token::$tok_type(token::intern($tok_contents)), + None)); + }} + } + + test!("'a'", Char, "a"); + test!("b'a'", Byte, "a"); + test!("\"a\"", Str_, "a"); + test!("b\"a\"", Binary, "a"); + test!("1234", Integer, "1234"); + test!("0b101", Integer, "0b101"); + test!("0xABC", Integer, "0xABC"); + test!("1.0", Float, "1.0"); + test!("1.0e10", Float, "1.0e10"); + + assert_eq!(setup(&mk_sh(), "2u".to_string()).next_token().tok, + token::Literal(token::Integer(token::intern("2")), + Some(token::intern("u")))); + assert_eq!(setup(&mk_sh(), "r###\"raw\"###suffix".to_string()).next_token().tok, + token::Literal(token::StrRaw(token::intern("raw"), 3), + Some(token::intern("suffix")))); + assert_eq!(setup(&mk_sh(), "br###\"raw\"###suffix".to_string()).next_token().tok, + token::Literal(token::BinaryRaw(token::intern("raw"), 3), + Some(token::intern("suffix")))); } #[test] fn line_doc_comments() { @@ -1573,7 +1564,7 @@ mod test { token::Comment => { }, _ => panic!("expected a comment!") } - assert_eq!(lexer.next_token().tok, token::LitChar(token::intern("a"))); + assert_eq!(lexer.next_token().tok, token::Literal(token::Char(token::intern("a")), None)); } } diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index 2810db4eaddd8..d111108269dfe 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -511,28 +511,41 @@ pub fn raw_str_lit(lit: &str) -> String { res } -pub fn float_lit(s: &str) -> ast::Lit_ { - debug!("float_lit: {}", s); - // FIXME #2252: bounds checking float literals is defered until trans - let s2 = s.chars().filter(|&c| c != '_').collect::(); - let s = s2.as_slice(); - - let mut ty = None; - - if s.ends_with("f32") { - ty = Some(ast::TyF32); - } else if s.ends_with("f64") { - ty = Some(ast::TyF64); - } +// check if `s` looks like i32 or u1234 etc. +fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { + s.len() > 1 && + first_chars.contains(&s.char_at(0)) && + s.slice_from(1).chars().all(|c| '0' <= c && c <= '9') +} +fn filtered_float_lit(data: token::InternedString, suffix: Option<&str>, + sd: &SpanHandler, sp: Span) -> ast::Lit_ { + debug!("filtered_float_lit: {}, {}", data, suffix); + match suffix { + Some("f32") => ast::LitFloat(data, ast::TyF32), + Some("f64") => ast::LitFloat(data, ast::TyF64), + Some(suf) => { + if suf.len() >= 2 && looks_like_width_suffix(&['f'], suf) { + // if it looks like a width, lets try to be helpful. + sd.span_err(sp, &*format!("illegal width `{}` for float literal, \ + valid widths are 32 and 64", suf.slice_from(1))); + } else { + sd.span_err(sp, &*format!("illegal suffix `{}` for float literal, \ + valid suffixes are `f32` and `f64`", suf)); + } - match ty { - Some(t) => { - ast::LitFloat(token::intern_and_get_ident(s.slice_to(s.len() - t.suffix_len())), t) - }, - None => ast::LitFloatUnsuffixed(token::intern_and_get_ident(s)) + ast::LitFloatUnsuffixed(data) + } + None => ast::LitFloatUnsuffixed(data) } } +pub fn float_lit(s: &str, suffix: Option<&str>, sd: &SpanHandler, sp: Span) -> ast::Lit_ { + debug!("float_lit: {}, {}", s, suffix); + // FIXME #2252: bounds checking float literals is defered until trans + let s = s.chars().filter(|&c| c != '_').collect::(); + let data = token::intern_and_get_ident(&*s); + filtered_float_lit(data, suffix, sd, sp) +} /// Parse a string representing a byte literal into its final form. Similar to `char_lit` pub fn byte_lit(lit: &str) -> (u8, uint) { @@ -626,24 +639,19 @@ pub fn binary_lit(lit: &str) -> Rc> { Rc::new(res) } -pub fn integer_lit(s: &str, sd: &SpanHandler, sp: Span) -> ast::Lit_ { +pub fn integer_lit(s: &str, suffix: Option<&str>, sd: &SpanHandler, sp: Span) -> ast::Lit_ { // s can only be ascii, byte indexing is fine let s2 = s.chars().filter(|&c| c != '_').collect::(); let mut s = s2.as_slice(); - debug!("parse_integer_lit: {}", s); - - if s.len() == 1 { - let n = (s.char_at(0)).to_digit(10).unwrap(); - return ast::LitInt(n as u64, ast::UnsuffixedIntLit(ast::Sign::new(n))); - } + debug!("integer_lit: {}, {}", s, suffix); let mut base = 10; let orig = s; let mut ty = ast::UnsuffixedIntLit(ast::Plus); - if s.char_at(0) == '0' { + if s.char_at(0) == '0' && s.len() > 1 { match s.char_at(1) { 'x' => base = 16, 'o' => base = 8, @@ -652,57 +660,56 @@ pub fn integer_lit(s: &str, sd: &SpanHandler, sp: Span) -> ast::Lit_ { } } + // 1f64 and 2f32 etc. are valid float literals. + match suffix { + Some(suf) if looks_like_width_suffix(&['f'], suf) => { + match base { + 16u => sd.span_err(sp, "hexadecimal float literal is not supported"), + 8u => sd.span_err(sp, "octal float literal is not supported"), + 2u => sd.span_err(sp, "binary float literal is not supported"), + _ => () + } + let ident = token::intern_and_get_ident(&*s); + return filtered_float_lit(ident, suffix, sd, sp) + } + _ => {} + } + if base != 10 { s = s.slice_from(2); } - let last = s.len() - 1; - match s.char_at(last) { - 'i' => ty = ast::SignedIntLit(ast::TyI, ast::Plus), - 'u' => ty = ast::UnsignedIntLit(ast::TyU), - '8' => { - if s.len() > 2 { - match s.char_at(last - 1) { - 'i' => ty = ast::SignedIntLit(ast::TyI8, ast::Plus), - 'u' => ty = ast::UnsignedIntLit(ast::TyU8), - _ => { } - } - } - }, - '6' => { - if s.len() > 3 && s.char_at(last - 1) == '1' { - match s.char_at(last - 2) { - 'i' => ty = ast::SignedIntLit(ast::TyI16, ast::Plus), - 'u' => ty = ast::UnsignedIntLit(ast::TyU16), - _ => { } - } - } - }, - '2' => { - if s.len() > 3 && s.char_at(last - 1) == '3' { - match s.char_at(last - 2) { - 'i' => ty = ast::SignedIntLit(ast::TyI32, ast::Plus), - 'u' => ty = ast::UnsignedIntLit(ast::TyU32), - _ => { } - } - } - }, - '4' => { - if s.len() > 3 && s.char_at(last - 1) == '6' { - match s.char_at(last - 2) { - 'i' => ty = ast::SignedIntLit(ast::TyI64, ast::Plus), - 'u' => ty = ast::UnsignedIntLit(ast::TyU64), - _ => { } + if let Some(suf) = suffix { + if suf.is_empty() { sd.span_bug(sp, "found empty literal suffix in Some")} + ty = match suf { + "i" => ast::SignedIntLit(ast::TyI, ast::Plus), + "i8" => ast::SignedIntLit(ast::TyI8, ast::Plus), + "i16" => ast::SignedIntLit(ast::TyI16, ast::Plus), + "i32" => ast::SignedIntLit(ast::TyI32, ast::Plus), + "i64" => ast::SignedIntLit(ast::TyI64, ast::Plus), + "u" => ast::UnsignedIntLit(ast::TyU), + "u8" => ast::UnsignedIntLit(ast::TyU8), + "u16" => ast::UnsignedIntLit(ast::TyU16), + "u32" => ast::UnsignedIntLit(ast::TyU32), + "u64" => ast::UnsignedIntLit(ast::TyU64), + _ => { + // i and u look like widths, so lets + // give an error message along those lines + if looks_like_width_suffix(&['i', 'u'], suf) { + sd.span_err(sp, &*format!("illegal width `{}` for integer literal; \ + valid widths are 8, 16, 32 and 64", + suf.slice_from(1))); + } else { + sd.span_err(sp, &*format!("illegal suffix `{}` for numeric literal", suf)); } + + ty } - }, - _ => { } + } } - debug!("The suffix is {}, base {}, the new string is {}, the original \ - string was {}", ty, base, s, orig); - - s = s.slice_to(s.len() - ty.suffix_len()); + debug!("integer_lit: the type is {}, base {}, the new string is {}, the original \ + string was {}, the original suffix was {}", ty, base, s, orig, suffix); let res: u64 = match ::std::num::from_str_radix(s, base) { Some(r) => r, diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 40c4ac9f8c044..85364b8f65ffa 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -646,6 +646,20 @@ impl<'a> Parser<'a> { } } + pub fn expect_no_suffix(&mut self, sp: Span, kind: &str, suffix: Option) { + match suffix { + None => {/* everything ok */} + Some(suf) => { + let text = suf.as_str(); + if text.is_empty() { + self.span_bug(sp, "found empty literal suffix in Some") + } + self.span_err(sp, &*format!("{} with a suffix is illegal", kind)); + } + } + } + + /// Attempt to consume a `<`. If `<<` is seen, replace it with a single /// `<` and continue. If a `<` is not seen, return false. /// @@ -968,6 +982,9 @@ impl<'a> Parser<'a> { pub fn span_err(&mut self, sp: Span, m: &str) { self.sess.span_diagnostic.span_err(sp, m) } + pub fn span_bug(&mut self, sp: Span, m: &str) -> ! { + self.sess.span_diagnostic.span_bug(sp, m) + } pub fn abort_if_errors(&mut self) { self.sess.span_diagnostic.handler().abort_if_errors(); } @@ -1640,24 +1657,53 @@ impl<'a> Parser<'a> { /// Matches token_lit = LIT_INTEGER | ... pub fn lit_from_token(&mut self, tok: &token::Token) -> Lit_ { match *tok { - token::LitByte(i) => LitByte(parse::byte_lit(i.as_str()).val0()), - token::LitChar(i) => LitChar(parse::char_lit(i.as_str()).val0()), - token::LitInteger(s) => parse::integer_lit(s.as_str(), - &self.sess.span_diagnostic, - self.last_span), - token::LitFloat(s) => parse::float_lit(s.as_str()), - token::LitStr(s) => { - LitStr(token::intern_and_get_ident(parse::str_lit(s.as_str()).as_slice()), - ast::CookedStr) - } - token::LitStrRaw(s, n) => { - LitStr(token::intern_and_get_ident(parse::raw_str_lit(s.as_str()).as_slice()), - ast::RawStr(n)) + token::Literal(lit, suf) => { + let (suffix_illegal, out) = match lit { + token::Byte(i) => (true, LitByte(parse::byte_lit(i.as_str()).val0())), + token::Char(i) => (true, LitChar(parse::char_lit(i.as_str()).val0())), + + // there are some valid suffixes for integer and + // float literals, so all the handling is done + // internally. + token::Integer(s) => { + (false, parse::integer_lit(s.as_str(), + suf.as_ref().map(|s| s.as_str()), + &self.sess.span_diagnostic, + self.last_span)) + } + token::Float(s) => { + (false, parse::float_lit(s.as_str(), + suf.as_ref().map(|s| s.as_str()), + &self.sess.span_diagnostic, + self.last_span)) + } + + token::Str_(s) => { + (true, + LitStr(token::intern_and_get_ident(parse::str_lit(s.as_str()).as_slice()), + ast::CookedStr)) + } + token::StrRaw(s, n) => { + (true, + LitStr( + token::intern_and_get_ident( + parse::raw_str_lit(s.as_str()).as_slice()), + ast::RawStr(n))) + } + token::Binary(i) => + (true, LitBinary(parse::binary_lit(i.as_str()))), + token::BinaryRaw(i, _) => + (true, + LitBinary(Rc::new(i.as_str().as_bytes().iter().map(|&x| x).collect()))), + }; + + if suffix_illegal { + let sp = self.last_span; + self.expect_no_suffix(sp, &*format!("{} literal", lit.short_name()), suf) + } + + out } - token::LitBinary(i) => - LitBinary(parse::binary_lit(i.as_str())), - token::LitBinaryRaw(i, _) => - LitBinary(Rc::new(i.as_str().as_bytes().iter().map(|&x| x).collect())), _ => { self.unexpected_last(tok); } } } @@ -2424,7 +2470,10 @@ impl<'a> Parser<'a> { } } } - token::LitInteger(n) => { + token::Literal(token::Integer(n), suf) => { + let sp = self.span; + self.expect_no_suffix(sp, "tuple index", suf); + let index = n.as_str(); let dot = self.last_span.hi; hi = self.span.hi; @@ -2449,7 +2498,7 @@ impl<'a> Parser<'a> { } } } - token::LitFloat(n) => { + token::Literal(token::Float(n), _suf) => { self.bump(); let last_span = self.last_span; let fstr = n.as_str(); @@ -5085,12 +5134,17 @@ impl<'a> Parser<'a> { self.expect(&token::Semi); (path, the_ident) }, - token::LitStr(..) | token::LitStrRaw(..) => { - let path = self.parse_str(); + token::Literal(token::Str_(..), suf) | token::Literal(token::StrRaw(..), suf) => { + let sp = self.span; + self.expect_no_suffix(sp, "extern crate name", suf); + // forgo the internal suffix check of `parse_str` to + // avoid repeats (this unwrap will always succeed due + // to the restriction of the `match`) + let (s, style, _) = self.parse_optional_str().unwrap(); self.expect_keyword(keywords::As); let the_ident = self.parse_ident(); self.expect(&token::Semi); - (Some(path), the_ident) + (Some((s, style)), the_ident) }, _ => { let span = self.span; @@ -5267,7 +5321,9 @@ impl<'a> Parser<'a> { /// the `extern` keyword, if one is found. fn parse_opt_abi(&mut self) -> Option { match self.token { - token::LitStr(s) | token::LitStrRaw(s, _) => { + token::Literal(token::Str_(s), suf) | token::Literal(token::StrRaw(s, _), suf) => { + let sp = self.span; + self.expect_no_suffix(sp, "ABI spec", suf); self.bump(); let the_string = s.as_str(); match abi::lookup(the_string) { @@ -5902,21 +5958,27 @@ impl<'a> Parser<'a> { } pub fn parse_optional_str(&mut self) - -> Option<(InternedString, ast::StrStyle)> { - let (s, style) = match self.token { - token::LitStr(s) => (self.id_to_interned_str(s.ident()), ast::CookedStr), - token::LitStrRaw(s, n) => { - (self.id_to_interned_str(s.ident()), ast::RawStr(n)) + -> Option<(InternedString, ast::StrStyle, Option)> { + let ret = match self.token { + token::Literal(token::Str_(s), suf) => { + (self.id_to_interned_str(s.ident()), ast::CookedStr, suf) + } + token::Literal(token::StrRaw(s, n), suf) => { + (self.id_to_interned_str(s.ident()), ast::RawStr(n), suf) } _ => return None }; self.bump(); - Some((s, style)) + Some(ret) } pub fn parse_str(&mut self) -> (InternedString, StrStyle) { match self.parse_optional_str() { - Some(s) => { s } + Some((s, style, suf)) => { + let sp = self.last_span; + self.expect_no_suffix(sp, "str literal", suf); + (s, style) + } _ => self.fatal("expected string literal") } } diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index 298328d73efb0..4272b57a4dc51 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -12,6 +12,7 @@ pub use self::BinOpToken::*; pub use self::Nonterminal::*; pub use self::DelimToken::*; pub use self::IdentStyle::*; +pub use self::Lit::*; pub use self::Token::*; use ast; @@ -59,6 +60,31 @@ pub enum IdentStyle { Plain, } +#[deriving(Clone, Encodable, Decodable, PartialEq, Eq, Hash, Show)] +pub enum Lit { + Byte(ast::Name), + Char(ast::Name), + Integer(ast::Name), + Float(ast::Name), + Str_(ast::Name), + StrRaw(ast::Name, uint), /* raw str delimited by n hash symbols */ + Binary(ast::Name), + BinaryRaw(ast::Name, uint), /* raw binary str delimited by n hash symbols */ +} + +impl Lit { + pub fn short_name(&self) -> &'static str { + match *self { + Byte(_) => "byte", + Char(_) => "char", + Integer(_) => "integer", + Float(_) => "float", + Str_(_) | StrRaw(..) => "str", + Binary(_) | BinaryRaw(..) => "binary str" + } + } +} + #[allow(non_camel_case_types)] #[deriving(Clone, Encodable, Decodable, PartialEq, Eq, Hash, Show)] pub enum Token { @@ -98,14 +124,7 @@ pub enum Token { CloseDelim(DelimToken), /* Literals */ - LitByte(ast::Name), - LitChar(ast::Name), - LitInteger(ast::Name), - LitFloat(ast::Name), - LitStr(ast::Name), - LitStrRaw(ast::Name, uint), /* raw str delimited by n hash symbols */ - LitBinary(ast::Name), - LitBinaryRaw(ast::Name, uint), /* raw binary str delimited by n hash symbols */ + Literal(Lit, Option), /* Name components */ Ident(ast::Ident, IdentStyle), @@ -145,14 +164,7 @@ impl Token { Ident(_, _) => true, Underscore => true, Tilde => true, - LitByte(_) => true, - LitChar(_) => true, - LitInteger(_) => true, - LitFloat(_) => true, - LitStr(_) => true, - LitStrRaw(_, _) => true, - LitBinary(_) => true, - LitBinaryRaw(_, _) => true, + Literal(_, _) => true, Pound => true, At => true, Not => true, @@ -173,15 +185,8 @@ impl Token { /// Returns `true` if the token is any literal pub fn is_lit(&self) -> bool { match *self { - LitByte(_) => true, - LitChar(_) => true, - LitInteger(_) => true, - LitFloat(_) => true, - LitStr(_) => true, - LitStrRaw(_, _) => true, - LitBinary(_) => true, - LitBinaryRaw(_, _) => true, - _ => false, + Literal(_, _) => true, + _ => false, } } diff --git a/src/libsyntax/print/pprust.rs b/src/libsyntax/print/pprust.rs index e6e0c33a42dbd..642ffa3745d9f 100644 --- a/src/libsyntax/print/pprust.rs +++ b/src/libsyntax/print/pprust.rs @@ -236,18 +236,28 @@ pub fn token_to_string(tok: &Token) -> String { token::Question => "?".into_string(), /* Literals */ - token::LitByte(b) => format!("b'{}'", b.as_str()), - token::LitChar(c) => format!("'{}'", c.as_str()), - token::LitFloat(c) => c.as_str().into_string(), - token::LitInteger(c) => c.as_str().into_string(), - token::LitStr(s) => format!("\"{}\"", s.as_str()), - token::LitStrRaw(s, n) => format!("r{delim}\"{string}\"{delim}", - delim="#".repeat(n), - string=s.as_str()), - token::LitBinary(v) => format!("b\"{}\"", v.as_str()), - token::LitBinaryRaw(s, n) => format!("br{delim}\"{string}\"{delim}", - delim="#".repeat(n), - string=s.as_str()), + token::Literal(lit, suf) => { + let mut out = match lit { + token::Byte(b) => format!("b'{}'", b.as_str()), + token::Char(c) => format!("'{}'", c.as_str()), + token::Float(c) => c.as_str().into_string(), + token::Integer(c) => c.as_str().into_string(), + token::Str_(s) => format!("\"{}\"", s.as_str()), + token::StrRaw(s, n) => format!("r{delim}\"{string}\"{delim}", + delim="#".repeat(n), + string=s.as_str()), + token::Binary(v) => format!("b\"{}\"", v.as_str()), + token::BinaryRaw(s, n) => format!("br{delim}\"{string}\"{delim}", + delim="#".repeat(n), + string=s.as_str()), + }; + + if let Some(s) = suf { + out.push_str(s.as_str()) + } + + out + } /* Name components */ token::Ident(s, _) => token::get_ident(s).get().into_string(), diff --git a/src/test/compile-fail/bad-lit-suffixes.rs b/src/test/compile-fail/bad-lit-suffixes.rs new file mode 100644 index 0000000000000..e142365a8ca07 --- /dev/null +++ b/src/test/compile-fail/bad-lit-suffixes.rs @@ -0,0 +1,41 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + + +extern crate + "foo"suffix //~ ERROR extern crate name with a suffix is illegal + as foo; + +extern + "C"suffix //~ ERROR ABI spec with a suffix is illegal + fn foo() {} + +extern + "C"suffix //~ ERROR ABI spec with a suffix is illegal +{} + +fn main() { + ""suffix; //~ ERROR str literal with a suffix is illegal + b""suffix; //~ ERROR binary str literal with a suffix is illegal + r#""#suffix; //~ ERROR str literal with a suffix is illegal + br#""#suffix; //~ ERROR binary str literal with a suffix is illegal + 'a'suffix; //~ ERROR char literal with a suffix is illegal + b'a'suffix; //~ ERROR byte literal with a suffix is illegal + + 1234u1024; //~ ERROR illegal width `1024` for integer literal + 1234i1024; //~ ERROR illegal width `1024` for integer literal + 1234f1024; //~ ERROR illegal width `1024` for float literal + 1234.5f1024; //~ ERROR illegal width `1024` for float literal + + 1234suffix; //~ ERROR illegal suffix `suffix` for numeric literal + 0b101suffix; //~ ERROR illegal suffix `suffix` for numeric literal + 1.0suffix; //~ ERROR illegal suffix `suffix` for numeric literal + 1.0e10suffix; //~ ERROR illegal suffix `suffix` for numeric literal +}