From 5b5638f6863803477d56e200d6a9a208015838c1 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Wed, 19 Nov 2014 10:17:40 +1100 Subject: [PATCH 1/7] Switch to an independent enum for `Lit*` subtokens. --- src/grammar/verify.rs | 50 +++++++++++++++-------------- src/librustdoc/html/highlight.rs | 7 ++-- src/libsyntax/ast.rs | 2 +- src/libsyntax/diagnostics/plugin.rs | 2 +- src/libsyntax/ext/quote.rs | 37 ++++++++++----------- src/libsyntax/parse/lexer/mod.rs | 41 +++++++++++------------ src/libsyntax/parse/parser.rs | 28 ++++++++-------- src/libsyntax/parse/token.rs | 42 ++++++++++-------------- src/libsyntax/print/pprust.rs | 24 +++++++------- 9 files changed, 115 insertions(+), 118 deletions(-) diff --git a/src/grammar/verify.rs b/src/grammar/verify.rs index 159a62f011072..84a288ef042d0 100644 --- a/src/grammar/verify.rs +++ b/src/grammar/verify.rs @@ -61,7 +61,7 @@ fn parse_token_list(file: &str) -> HashMap { "SHL" => token::BinOp(token::Shl), "LBRACE" => token::OpenDelim(token::Brace), "RARROW" => token::Rarrow, - "LIT_STR" => token::LitStr(Name(0)), + "LIT_STR" => token::Literal(token::Str_(Name(0))), "DOTDOT" => token::DotDot, "MOD_SEP" => token::ModSep, "DOTDOTDOT" => token::DotDotDot, @@ -71,7 +71,7 @@ fn parse_token_list(file: &str) -> HashMap { "ANDAND" => token::AndAnd, "AT" => token::At, "LBRACKET" => token::OpenDelim(token::Bracket), - "LIT_STR_RAW" => token::LitStrRaw(Name(0), 0), + "LIT_STR_RAW" => token::Literal(token::StrRaw(Name(0), 0)), "RPAREN" => token::CloseDelim(token::Paren), "SLASH" => token::BinOp(token::Slash), "COMMA" => token::Comma, @@ -80,8 +80,8 @@ fn parse_token_list(file: &str) -> HashMap { "TILDE" => token::Tilde, "IDENT" => token::Id(), "PLUS" => token::BinOp(token::Plus), - "LIT_CHAR" => token::LitChar(Name(0)), - "LIT_BYTE" => token::LitByte(Name(0)), + "LIT_CHAR" => token::Literal(token::Char(Name(0))), + "LIT_BYTE" => token::Literal(token::Byte(Name(0))), "EQ" => token::Eq, "RBRACKET" => token::CloseDelim(token::Bracket), "COMMENT" => token::Comment, @@ -95,9 +95,9 @@ fn parse_token_list(file: &str) -> HashMap { "BINOP" => token::BinOp(token::Plus), "POUND" => token::Pound, "OROR" => token::OrOr, - "LIT_INTEGER" => token::LitInteger(Name(0)), + "LIT_INTEGER" => token::Literal(token::Integer(Name(0))), "BINOPEQ" => token::BinOpEq(token::Plus), - "LIT_FLOAT" => token::LitFloat(Name(0)), + "LIT_FLOAT" => token::Literal(token::Float(Name(0))), "WHITESPACE" => token::Whitespace, "UNDERSCORE" => token::Underscore, "MINUS" => token::BinOp(token::Minus), @@ -107,8 +107,8 @@ fn parse_token_list(file: &str) -> HashMap { "OR" => token::BinOp(token::Or), "GT" => token::Gt, "LE" => token::Le, - "LIT_BINARY" => token::LitBinary(Name(0)), - "LIT_BINARY_RAW" => token::LitBinaryRaw(Name(0), 0), + "LIT_BINARY" => token::Literal(token::Binary(Name(0))), + "LIT_BINARY_RAW" => token::Literal(token::BinaryRaw(Name(0), 0)), _ => continue, }; @@ -189,15 +189,17 @@ fn parse_antlr_token(s: &str, tokens: &HashMap) -> TokenAndSpan { token::BinOp(..) => token::BinOp(str_to_binop(content)), token::BinOpEq(..) => token::BinOpEq(str_to_binop(content.slice_to( content.len() - 1))), - token::LitStr(..) => token::LitStr(fix(content)), - token::LitStrRaw(..) => token::LitStrRaw(fix(content), count(content)), - token::LitChar(..) => token::LitChar(fixchar(content)), - token::LitByte(..) => token::LitByte(fixchar(content)), + token::Literal(token::Str_(..)) => token::Literal(token::Str_(fix(content))), + token::Literal(token::StrRaw(..)) => token::Literal(token::StrRaw(fix(content), + count(content))), + token::Literal(token::Char(..)) => token::Literal(token::Char(fixchar(content))), + token::Literal(token::Byte(..)) => token::Literal(token::Byte(fixchar(content))), token::DocComment(..) => token::DocComment(nm), - token::LitInteger(..) => token::LitInteger(nm), - token::LitFloat(..) => token::LitFloat(nm), - token::LitBinary(..) => token::LitBinary(nm), - token::LitBinaryRaw(..) => token::LitBinaryRaw(fix(content), count(content)), + token::Literal(token::Integer(..)) => token::Literal(token::Integer(nm)), + token::Literal(token::Float(..)) => token::Literal(token::Float(nm)), + token::Literal(token::Binary(..)) => token::Literal(token::Binary(nm)), + token::Literal(token::BinaryRaw(..)) => token::Literal(token::BinaryRaw(fix(content), + count(content))), token::Ident(..) => token::Ident(ast::Ident { name: nm, ctxt: 0 }, token::ModName), token::Lifetime(..) => token::Lifetime(ast::Ident { name: nm, ctxt: 0 }), @@ -284,14 +286,14 @@ fn main() { ) matches!( - LitByte(..), - LitChar(..), - LitInteger(..), - LitFloat(..), - LitStr(..), - LitStrRaw(..), - LitBinary(..), - LitBinaryRaw(..), + token::Literal(token::Byte(..)), + token::Literal(token::Char(..)), + token::Literal(token::Integer(..)), + token::Literal(token::Float(..)), + token::Literal(token::Str_(..)), + token::Literal(token::StrRaw(..)), + token::Literal(token::Binary(..)), + token::Literal(token::BinaryRaw(..)), Ident(..), Lifetime(..), Interpolated(..), diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index d445da9d1340d..527ef553d99e5 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -129,11 +129,12 @@ fn doit(sess: &parse::ParseSess, mut lexer: lexer::StringReader, } // text literals - token::LitByte(..) | token::LitBinary(..) | token::LitBinaryRaw(..) | - token::LitChar(..) | token::LitStr(..) | token::LitStrRaw(..) => "string", + token::Literal(token::Byte(..)) | token::Literal(token::Char(..)) | + token::Literal(token::Binary(..)) | token::Literal(token::BinaryRaw(..)) | + token::Literal(token::Str_(..)) | token::Literal(token::StrRaw(..)) => "string", // number literals - token::LitInteger(..) | token::LitFloat(..) => "number", + token::Literal(token::Integer(..)) | token::Literal(token::Float(..)) => "number", // keywords are also included in the identifier set token::Ident(ident, _is_mod_sep) => { diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs index 15e14902727f5..2158bdb416c7e 100644 --- a/src/libsyntax/ast.rs +++ b/src/libsyntax/ast.rs @@ -838,7 +838,7 @@ impl TokenTree { tts: vec![TtToken(sp, token::Ident(token::str_to_ident("doc"), token::Plain)), TtToken(sp, token::Eq), - TtToken(sp, token::LitStr(name))], + TtToken(sp, token::Literal(token::Str_(name)))], close_span: sp, })) } diff --git a/src/libsyntax/diagnostics/plugin.rs b/src/libsyntax/diagnostics/plugin.rs index d077fbd7bf00f..b928fc778e8bc 100644 --- a/src/libsyntax/diagnostics/plugin.rs +++ b/src/libsyntax/diagnostics/plugin.rs @@ -87,7 +87,7 @@ pub fn expand_register_diagnostic<'cx>(ecx: &'cx mut ExtCtxt, }, [ast::TtToken(_, token::Ident(ref code, _)), ast::TtToken(_, token::Comma), - ast::TtToken(_, token::LitStrRaw(description, _))] => { + ast::TtToken(_, token::Literal(token::StrRaw(description, _)))] => { (code, Some(description)) } _ => unreachable!() diff --git a/src/libsyntax/ext/quote.rs b/src/libsyntax/ext/quote.rs index ec69175707746..ec51ce00605fd 100644 --- a/src/libsyntax/ext/quote.rs +++ b/src/libsyntax/ext/quote.rs @@ -542,6 +542,13 @@ fn mk_delim(cx: &ExtCtxt, sp: Span, delim: token::DelimToken) -> P { #[allow(non_upper_case_globals)] fn mk_token(cx: &ExtCtxt, sp: Span, tok: &token::Token) -> P { + macro_rules! mk_lit { + ($name: expr, $($args: expr),*) => {{ + let inner = cx.expr_call(sp, mk_token_path(cx, sp, $name), vec![$($args),*]); + + cx.expr_call(sp, mk_token_path(cx, sp, "Literal"), vec![inner]) + }} + } match *tok { token::BinOp(binop) => { return cx.expr_call(sp, mk_token_path(cx, sp, "BinOp"), vec!(mk_binop(cx, sp, binop))); @@ -560,38 +567,32 @@ fn mk_token(cx: &ExtCtxt, sp: Span, tok: &token::Token) -> P { vec![mk_delim(cx, sp, delim)]); } - token::LitByte(i) => { + token::Literal(token::Byte(i)) => { let e_byte = mk_name(cx, sp, i.ident()); - - return cx.expr_call(sp, mk_token_path(cx, sp, "LitByte"), vec!(e_byte)); + return mk_lit!("Byte", e_byte); } - token::LitChar(i) => { + token::Literal(token::Char(i)) => { let e_char = mk_name(cx, sp, i.ident()); - - return cx.expr_call(sp, mk_token_path(cx, sp, "LitChar"), vec!(e_char)); + return mk_lit!("Char", e_char); } - token::LitInteger(i) => { + token::Literal(token::Integer(i)) => { let e_int = mk_name(cx, sp, i.ident()); - return cx.expr_call(sp, mk_token_path(cx, sp, "LitInteger"), vec!(e_int)); + return mk_lit!("Integer", e_int); } - token::LitFloat(fident) => { + token::Literal(token::Float(fident)) => { let e_fident = mk_name(cx, sp, fident.ident()); - return cx.expr_call(sp, mk_token_path(cx, sp, "LitFloat"), vec!(e_fident)); + return mk_lit!("Float", e_fident); } - token::LitStr(ident) => { - return cx.expr_call(sp, - mk_token_path(cx, sp, "LitStr"), - vec!(mk_name(cx, sp, ident.ident()))); + token::Literal(token::Str_(ident)) => { + return mk_lit!("Str_", mk_name(cx, sp, ident.ident())) } - token::LitStrRaw(ident, n) => { - return cx.expr_call(sp, - mk_token_path(cx, sp, "LitStrRaw"), - vec!(mk_name(cx, sp, ident.ident()), cx.expr_uint(sp, n))); + token::Literal(token::StrRaw(ident, n)) => { + return mk_lit!("StrRaw", mk_name(cx, sp, ident.ident()), cx.expr_uint(sp, n)) } token::Ident(ident, style) => { diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 01a66243a965f..b7598c7c42820 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -655,17 +655,17 @@ impl<'a> StringReader<'a> { } 'u' | 'i' => { self.scan_int_suffix(); - return token::LitInteger(self.name_from(start_bpos)); + return token::Literal(token::Integer(self.name_from(start_bpos))); }, 'f' => { let last_pos = self.last_pos; self.scan_float_suffix(); self.check_float_base(start_bpos, last_pos, base); - return token::LitFloat(self.name_from(start_bpos)); + return token::Literal(token::Float(self.name_from(start_bpos))); } _ => { // just a 0 - return token::LitInteger(self.name_from(start_bpos)); + return token::Literal(token::Integer(self.name_from(start_bpos))); } } } else if c.is_digit_radix(10) { @@ -678,7 +678,7 @@ impl<'a> StringReader<'a> { self.err_span_(start_bpos, self.last_pos, "no valid digits found for number"); // eat any suffix self.scan_int_suffix(); - return token::LitInteger(token::intern("0")); + return token::Literal(token::Integer(token::intern("0"))); } // might be a float, but don't be greedy if this is actually an @@ -696,13 +696,13 @@ impl<'a> StringReader<'a> { } let last_pos = self.last_pos; self.check_float_base(start_bpos, last_pos, base); - return token::LitFloat(self.name_from(start_bpos)); + return token::Literal(token::Float(self.name_from(start_bpos))); } else if self.curr_is('f') { // or it might be an integer literal suffixed as a float self.scan_float_suffix(); let last_pos = self.last_pos; self.check_float_base(start_bpos, last_pos, base); - return token::LitFloat(self.name_from(start_bpos)); + return token::Literal(token::Float(self.name_from(start_bpos))); } else { // it might be a float if it has an exponent if self.curr_is('e') || self.curr_is('E') { @@ -710,11 +710,11 @@ impl<'a> StringReader<'a> { self.scan_float_suffix(); let last_pos = self.last_pos; self.check_float_base(start_bpos, last_pos, base); - return token::LitFloat(self.name_from(start_bpos)); + return token::Literal(token::Float(self.name_from(start_bpos))); } // but we certainly have an integer! self.scan_int_suffix(); - return token::LitInteger(self.name_from(start_bpos)); + return token::Literal(token::Integer(self.name_from(start_bpos))); } } @@ -1126,7 +1126,7 @@ impl<'a> StringReader<'a> { } let id = if valid { self.name_from(start) } else { token::intern("0") }; self.bump(); // advance curr past token - return token::LitChar(id); + return token::Literal(token::Char(id)); } 'b' => { self.bump(); @@ -1157,7 +1157,7 @@ impl<'a> StringReader<'a> { let id = if valid { self.name_from(start_bpos + BytePos(1)) } else { token::intern("??") }; self.bump(); - return token::LitStr(id); + return token::Literal(token::Str_(id)); } 'r' => { let start_bpos = self.last_pos; @@ -1224,7 +1224,7 @@ impl<'a> StringReader<'a> { } else { token::intern("??") }; - return token::LitStrRaw(id, hash_count); + return token::Literal(token::StrRaw(id, hash_count)); } '-' => { if self.nextch_is('>') { @@ -1314,7 +1314,7 @@ impl<'a> StringReader<'a> { let id = if valid { self.name_from(start) } else { token::intern("??") }; self.bump(); // advance curr past token - return token::LitByte(id); + return token::Literal(token::Byte(id)); } fn scan_byte_string(&mut self) -> token::Token { @@ -1336,7 +1336,7 @@ impl<'a> StringReader<'a> { } let id = if valid { self.name_from(start) } else { token::intern("??") }; self.bump(); - return token::LitBinary(id); + return token::Literal(token::Binary(id)); } fn scan_raw_byte_string(&mut self) -> token::Token { @@ -1387,8 +1387,9 @@ impl<'a> StringReader<'a> { self.bump(); } self.bump(); - return token::LitBinaryRaw(self.name_from_to(content_start_bpos, content_end_bpos), - hash_count); + return token::Literal(token::BinaryRaw(self.name_from_to(content_start_bpos, + content_end_bpos), + hash_count)); } } @@ -1535,17 +1536,17 @@ mod test { #[test] fn character_a() { assert_eq!(setup(&mk_sh(), "'a'".to_string()).next_token().tok, - token::LitChar(token::intern("a"))); + token::Literal(token::Char(token::intern("a")))); } #[test] fn character_space() { assert_eq!(setup(&mk_sh(), "' '".to_string()).next_token().tok, - token::LitChar(token::intern(" "))); + token::Literal(token::Char(token::intern(" ")))); } #[test] fn character_escaped() { assert_eq!(setup(&mk_sh(), "'\\n'".to_string()).next_token().tok, - token::LitChar(token::intern("\\n"))); + token::Literal(token::Char(token::intern("\\n")))); } #[test] fn lifetime_name() { @@ -1557,7 +1558,7 @@ mod test { assert_eq!(setup(&mk_sh(), "r###\"\"#a\\b\x00c\"\"###".to_string()).next_token() .tok, - token::LitStrRaw(token::intern("\"#a\\b\x00c\""), 3)); + token::Literal(token::StrRaw(token::intern("\"#a\\b\x00c\""), 3))); } #[test] fn line_doc_comments() { @@ -1573,7 +1574,7 @@ mod test { token::Comment => { }, _ => panic!("expected a comment!") } - assert_eq!(lexer.next_token().tok, token::LitChar(token::intern("a"))); + assert_eq!(lexer.next_token().tok, token::Literal(token::Char(token::intern("a")))); } } diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 40c4ac9f8c044..4edcb182e5385 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -1640,23 +1640,23 @@ impl<'a> Parser<'a> { /// Matches token_lit = LIT_INTEGER | ... pub fn lit_from_token(&mut self, tok: &token::Token) -> Lit_ { match *tok { - token::LitByte(i) => LitByte(parse::byte_lit(i.as_str()).val0()), - token::LitChar(i) => LitChar(parse::char_lit(i.as_str()).val0()), - token::LitInteger(s) => parse::integer_lit(s.as_str(), + token::Literal(token::Byte(i)) => LitByte(parse::byte_lit(i.as_str()).val0()), + token::Literal(token::Char(i)) => LitChar(parse::char_lit(i.as_str()).val0()), + token::Literal(token::Integer(s)) => parse::integer_lit(s.as_str(), &self.sess.span_diagnostic, self.last_span), - token::LitFloat(s) => parse::float_lit(s.as_str()), - token::LitStr(s) => { + token::Literal(token::Float(s)) => parse::float_lit(s.as_str()), + token::Literal(token::Str_(s)) => { LitStr(token::intern_and_get_ident(parse::str_lit(s.as_str()).as_slice()), ast::CookedStr) } - token::LitStrRaw(s, n) => { + token::Literal(token::StrRaw(s, n)) => { LitStr(token::intern_and_get_ident(parse::raw_str_lit(s.as_str()).as_slice()), ast::RawStr(n)) } - token::LitBinary(i) => + token::Literal(token::Binary(i)) => LitBinary(parse::binary_lit(i.as_str())), - token::LitBinaryRaw(i, _) => + token::Literal(token::BinaryRaw(i, _)) => LitBinary(Rc::new(i.as_str().as_bytes().iter().map(|&x| x).collect())), _ => { self.unexpected_last(tok); } } @@ -2424,7 +2424,7 @@ impl<'a> Parser<'a> { } } } - token::LitInteger(n) => { + token::Literal(token::Integer(n)) => { let index = n.as_str(); let dot = self.last_span.hi; hi = self.span.hi; @@ -2449,7 +2449,7 @@ impl<'a> Parser<'a> { } } } - token::LitFloat(n) => { + token::Literal(token::Float(n)) => { self.bump(); let last_span = self.last_span; let fstr = n.as_str(); @@ -5085,7 +5085,7 @@ impl<'a> Parser<'a> { self.expect(&token::Semi); (path, the_ident) }, - token::LitStr(..) | token::LitStrRaw(..) => { + token::Literal(token::Str_(..)) | token::Literal(token::StrRaw(..)) => { let path = self.parse_str(); self.expect_keyword(keywords::As); let the_ident = self.parse_ident(); @@ -5267,7 +5267,7 @@ impl<'a> Parser<'a> { /// the `extern` keyword, if one is found. fn parse_opt_abi(&mut self) -> Option { match self.token { - token::LitStr(s) | token::LitStrRaw(s, _) => { + token::Literal(token::Str_(s)) | token::Literal(token::StrRaw(s, _)) => { self.bump(); let the_string = s.as_str(); match abi::lookup(the_string) { @@ -5904,8 +5904,8 @@ impl<'a> Parser<'a> { pub fn parse_optional_str(&mut self) -> Option<(InternedString, ast::StrStyle)> { let (s, style) = match self.token { - token::LitStr(s) => (self.id_to_interned_str(s.ident()), ast::CookedStr), - token::LitStrRaw(s, n) => { + token::Literal(token::Str_(s)) => (self.id_to_interned_str(s.ident()), ast::CookedStr), + token::Literal(token::StrRaw(s, n)) => { (self.id_to_interned_str(s.ident()), ast::RawStr(n)) } _ => return None diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index 298328d73efb0..bfa6ca798b233 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -12,6 +12,7 @@ pub use self::BinOpToken::*; pub use self::Nonterminal::*; pub use self::DelimToken::*; pub use self::IdentStyle::*; +pub use self::Lit::*; pub use self::Token::*; use ast; @@ -59,6 +60,18 @@ pub enum IdentStyle { Plain, } +#[deriving(Clone, Encodable, Decodable, PartialEq, Eq, Hash, Show)] +pub enum Lit { + Byte(ast::Name), + Char(ast::Name), + Integer(ast::Name), + Float(ast::Name), + Str_(ast::Name), + StrRaw(ast::Name, uint), /* raw str delimited by n hash symbols */ + Binary(ast::Name), + BinaryRaw(ast::Name, uint), /* raw binary str delimited by n hash symbols */ +} + #[allow(non_camel_case_types)] #[deriving(Clone, Encodable, Decodable, PartialEq, Eq, Hash, Show)] pub enum Token { @@ -98,14 +111,7 @@ pub enum Token { CloseDelim(DelimToken), /* Literals */ - LitByte(ast::Name), - LitChar(ast::Name), - LitInteger(ast::Name), - LitFloat(ast::Name), - LitStr(ast::Name), - LitStrRaw(ast::Name, uint), /* raw str delimited by n hash symbols */ - LitBinary(ast::Name), - LitBinaryRaw(ast::Name, uint), /* raw binary str delimited by n hash symbols */ + Literal(Lit), /* Name components */ Ident(ast::Ident, IdentStyle), @@ -145,14 +151,7 @@ impl Token { Ident(_, _) => true, Underscore => true, Tilde => true, - LitByte(_) => true, - LitChar(_) => true, - LitInteger(_) => true, - LitFloat(_) => true, - LitStr(_) => true, - LitStrRaw(_, _) => true, - LitBinary(_) => true, - LitBinaryRaw(_, _) => true, + Literal(_) => true, Pound => true, At => true, Not => true, @@ -173,15 +172,8 @@ impl Token { /// Returns `true` if the token is any literal pub fn is_lit(&self) -> bool { match *self { - LitByte(_) => true, - LitChar(_) => true, - LitInteger(_) => true, - LitFloat(_) => true, - LitStr(_) => true, - LitStrRaw(_, _) => true, - LitBinary(_) => true, - LitBinaryRaw(_, _) => true, - _ => false, + Literal(_) => true, + _ => false, } } diff --git a/src/libsyntax/print/pprust.rs b/src/libsyntax/print/pprust.rs index e6e0c33a42dbd..7997c1ba4efcf 100644 --- a/src/libsyntax/print/pprust.rs +++ b/src/libsyntax/print/pprust.rs @@ -236,18 +236,18 @@ pub fn token_to_string(tok: &Token) -> String { token::Question => "?".into_string(), /* Literals */ - token::LitByte(b) => format!("b'{}'", b.as_str()), - token::LitChar(c) => format!("'{}'", c.as_str()), - token::LitFloat(c) => c.as_str().into_string(), - token::LitInteger(c) => c.as_str().into_string(), - token::LitStr(s) => format!("\"{}\"", s.as_str()), - token::LitStrRaw(s, n) => format!("r{delim}\"{string}\"{delim}", - delim="#".repeat(n), - string=s.as_str()), - token::LitBinary(v) => format!("b\"{}\"", v.as_str()), - token::LitBinaryRaw(s, n) => format!("br{delim}\"{string}\"{delim}", - delim="#".repeat(n), - string=s.as_str()), + token::Literal(token::Byte(b)) => format!("b'{}'", b.as_str()), + token::Literal(token::Char(c)) => format!("'{}'", c.as_str()), + token::Literal(token::Float(c)) => c.as_str().into_string(), + token::Literal(token::Integer(c)) => c.as_str().into_string(), + token::Literal(token::Str_(s)) => format!("\"{}\"", s.as_str()), + token::Literal(token::StrRaw(s, n)) => format!("r{delim}\"{string}\"{delim}", + delim="#".repeat(n), + string=s.as_str()), + token::Literal(token::Binary(v)) => format!("b\"{}\"", v.as_str()), + token::Literal(token::BinaryRaw(s, n)) => format!("br{delim}\"{string}\"{delim}", + delim="#".repeat(n), + string=s.as_str()), /* Name components */ token::Ident(s, _) => token::get_ident(s).get().into_string(), From 9d20a46799178df9d2fb28dfec95ba55cbfb7f9c Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Wed, 19 Nov 2014 12:52:44 +1100 Subject: [PATCH 2/7] Update src/grammar for language changes. --- src/grammar/verify.rs | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/src/grammar/verify.rs b/src/grammar/verify.rs index 84a288ef042d0..e3ff20f7874bf 100644 --- a/src/grammar/verify.rs +++ b/src/grammar/verify.rs @@ -26,21 +26,21 @@ use std::io::File; use syntax::parse; use syntax::parse::lexer; -use rustc::driver::{session, config}; +use rustc::session::{mod, config}; use syntax::ast; use syntax::ast::Name; use syntax::parse::token; use syntax::parse::lexer::TokenAndSpan; -fn parse_token_list(file: &str) -> HashMap { - fn id() -> Token { +fn parse_token_list(file: &str) -> HashMap { + fn id() -> token::Token { token::Ident(ast::Ident { name: Name(0), ctxt: 0, }, token::Plain) } let mut res = HashMap::new(); - res.insert("-1".to_string(), EOF); + res.insert("-1".to_string(), token::Eof); for line in file.split('\n') { let eq = match line.trim().rfind('=') { @@ -60,7 +60,7 @@ fn parse_token_list(file: &str) -> HashMap { "INT_SUFFIX" => id(), "SHL" => token::BinOp(token::Shl), "LBRACE" => token::OpenDelim(token::Brace), - "RARROW" => token::Rarrow, + "RARROW" => token::RArrow, "LIT_STR" => token::Literal(token::Str_(Name(0))), "DOTDOT" => token::DotDot, "MOD_SEP" => token::ModSep, @@ -78,7 +78,7 @@ fn parse_token_list(file: &str) -> HashMap { "LIFETIME" => token::Lifetime(ast::Ident { name: Name(0), ctxt: 0 }), "CARET" => token::BinOp(token::Caret), "TILDE" => token::Tilde, - "IDENT" => token::Id(), + "IDENT" => id(), "PLUS" => token::BinOp(token::Plus), "LIT_CHAR" => token::Literal(token::Char(Name(0))), "LIT_BYTE" => token::Literal(token::Byte(Name(0))), @@ -119,7 +119,7 @@ fn parse_token_list(file: &str) -> HashMap { res } -fn str_to_binop(s: &str) -> BinOpToken { +fn str_to_binop(s: &str) -> token::BinOpToken { match s { "+" => token::Plus, "/" => token::Slash, @@ -167,7 +167,7 @@ fn count(lit: &str) -> uint { lit.chars().take_while(|c| *c == '#').count() } -fn parse_antlr_token(s: &str, tokens: &HashMap) -> TokenAndSpan { +fn parse_antlr_token(s: &str, tokens: &HashMap) -> TokenAndSpan { let re = regex!( r"\[@(?P\d+),(?P\d+):(?P\d+)='(?P.+?)',<(?P-?\d+)>,\d+:\d+]" ); @@ -178,7 +178,7 @@ fn parse_antlr_token(s: &str, tokens: &HashMap) -> TokenAndSpan { let toknum = m.name("toknum"); let content = m.name("content"); - let proto_tok = tokens.get(&toknum).expect(format!("didn't find token {} in the map", + let proto_tok = tokens.get(toknum).expect(format!("didn't find token {} in the map", toknum).as_slice()); let nm = parse::token::intern(content); @@ -206,7 +206,8 @@ fn parse_antlr_token(s: &str, tokens: &HashMap) -> TokenAndSpan { ref t => t.clone() }; - let offset = if real_tok == EOF { + let offset = if real_tok == token::Eof + { 1 } else { 0 @@ -224,7 +225,7 @@ fn parse_antlr_token(s: &str, tokens: &HashMap) -> TokenAndSpan { } } -fn tok_cmp(a: &Token, b: &Token) -> bool { +fn tok_cmp(a: &token::Token, b: &token::Token) -> bool { match a { &token::Ident(id, _) => match b { &token::Ident(id2, _) => id == id2, @@ -242,17 +243,17 @@ fn main() { let args = std::os::args(); - let mut token_file = File::open(&Path::new(args.get(2).as_slice())); + let mut token_file = File::open(&Path::new(args[2].as_slice())); let token_map = parse_token_list(token_file.read_to_string().unwrap().as_slice()); let mut stdin = std::io::stdin(); let mut antlr_tokens = stdin.lines().map(|l| parse_antlr_token(l.unwrap().as_slice().trim(), &token_map)); - let code = File::open(&Path::new(args.get(1).as_slice())).unwrap().read_to_string().unwrap(); + let code = File::open(&Path::new(args[1].as_slice())).unwrap().read_to_string().unwrap(); let options = config::basic_options(); let session = session::build_session(options, None, - syntax::diagnostics::registry::Registry::new([])); + syntax::diagnostics::registry::Registry::new(&[])); let filemap = parse::string_to_filemap(&session.parse_sess, code, String::from_str("")); @@ -260,7 +261,7 @@ fn main() { for antlr_tok in antlr_tokens { let rustc_tok = next(&mut lexer); - if rustc_tok.tok == EOF && antlr_tok.tok == EOF { + if rustc_tok.tok == token::Eof && antlr_tok.tok == token::Eof { continue } @@ -294,11 +295,11 @@ fn main() { token::Literal(token::StrRaw(..)), token::Literal(token::Binary(..)), token::Literal(token::BinaryRaw(..)), - Ident(..), - Lifetime(..), - Interpolated(..), - DocComment(..), - Shebang(..) + token::Ident(..), + token::Lifetime(..), + token::Interpolated(..), + token::DocComment(..), + token::Shebang(..) ); } } From ff0278bc152c5e3072434615cf9861e98a160e71 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Wed, 19 Nov 2014 13:11:24 +1100 Subject: [PATCH 3/7] Update makefiles to ensure src/grammar/verify.rs is built. It's not run, but this ensures that the code at least doesn't go out of date. --- mk/grammar.mk | 16 ++++++++++++---- mk/tests.mk | 2 +- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/mk/grammar.mk b/mk/grammar.mk index 12190fb034854..a9f45907b8110 100644 --- a/mk/grammar.mk +++ b/mk/grammar.mk @@ -30,17 +30,25 @@ endef $(BG): $(Q)mkdir -p $(BG) -$(BG)RustLexer.class: $(SG)RustLexer.g4 +$(BG)RustLexer.class: $(BG) $(SG)RustLexer.g4 $(Q)$(CFG_ANTLR4) -o $(B)grammar $(SG)RustLexer.g4 $(Q)$(CFG_JAVAC) -d $(BG) $(BG)RustLexer.java -$(BG)verify: $(SG)verify.rs rustc-stage2-H-$(CFG_BUILD) $(LD)stamp.regex_macros $(LD)stamp.rustc - $(Q)$(RUSTC) -O --out-dir $(BG) -L $(L) $(SG)verify.rs +check-build-lexer-verifier: $(BG)verify + +ifeq ($(NO_REBUILD),) +VERIFY_DEPS := rustc-stage2-H-$(CFG_BUILD) $(LD)stamp.regex_macros $(LD)stamp.rustc +else +VERIFY_DEPS := +endif + +$(BG)verify: $(BG) $(SG)verify.rs $(VERIFY_DEPS) + $(Q)$(RUSTC) --out-dir $(BG) -L $(L) $(SG)verify.rs ifdef CFG_JAVAC ifdef CFG_ANTLR4 ifdef CFG_GRUN -check-lexer: $(BG) $(BG)RustLexer.class $(BG)verify +check-lexer: $(BG) $(BG)RustLexer.class check-build-lexer-verifier $(info Verifying libsyntax against the reference lexer ...) $(Q)$(SG)check.sh $(S) "$(BG)" \ "$(CFG_GRUN)" "$(BG)verify" "$(BG)RustLexer.tokens" diff --git a/mk/tests.mk b/mk/tests.mk index 4433d780dedf6..63a34e0f01007 100644 --- a/mk/tests.mk +++ b/mk/tests.mk @@ -199,7 +199,7 @@ check-docs: cleantestlibs cleantmptestlogs check-stage2-docs # Some less critical tests that are not prone to breakage. # Not run as part of the normal test suite, but tested by bors on checkin. -check-secondary: check-build-compiletest check-lexer check-pretty +check-secondary: check-build-compiletest check-build-lexer-verifier check-lexer check-pretty # check + check-secondary. # From 6679595853705ca11f64984a055be60233321a4a Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Wed, 19 Nov 2014 15:48:38 +1100 Subject: [PATCH 4/7] Parse and store suffixes on literals. This adds an optional suffix at the end of a literal token: `"foo"bar`. An actual use of a suffix in a expression (or other literal that the compiler reads) is rejected in the parser. This doesn't switch the handling of numbers to this system, and doesn't outlaw illegal suffixes for them yet. --- src/librustdoc/html/highlight.rs | 18 ++-- src/libsyntax/ast.rs | 2 +- src/libsyntax/diagnostics/plugin.rs | 2 +- src/libsyntax/ext/quote.rs | 33 ++++--- src/libsyntax/parse/lexer/mod.rs | 112 ++++++++++++++++------ src/libsyntax/parse/parser.rs | 109 +++++++++++++++------ src/libsyntax/parse/token.rs | 19 +++- src/libsyntax/print/pprust.rs | 34 ++++--- src/test/compile-fail/bad-lit-suffixes.rs | 36 +++++++ 9 files changed, 268 insertions(+), 97 deletions(-) create mode 100644 src/test/compile-fail/bad-lit-suffixes.rs diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index 527ef553d99e5..111650f565cf6 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -128,13 +128,17 @@ fn doit(sess: &parse::ParseSess, mut lexer: lexer::StringReader, } } - // text literals - token::Literal(token::Byte(..)) | token::Literal(token::Char(..)) | - token::Literal(token::Binary(..)) | token::Literal(token::BinaryRaw(..)) | - token::Literal(token::Str_(..)) | token::Literal(token::StrRaw(..)) => "string", - - // number literals - token::Literal(token::Integer(..)) | token::Literal(token::Float(..)) => "number", + token::Literal(lit, _suf) => { + match lit { + // text literals + token::Byte(..) | token::Char(..) | + token::Binary(..) | token::BinaryRaw(..) | + token::Str_(..) | token::StrRaw(..) => "string", + + // number literals + token::Integer(..) | token::Float(..) => "number", + } + } // keywords are also included in the identifier set token::Ident(ident, _is_mod_sep) => { diff --git a/src/libsyntax/ast.rs b/src/libsyntax/ast.rs index 2158bdb416c7e..7b16c08785926 100644 --- a/src/libsyntax/ast.rs +++ b/src/libsyntax/ast.rs @@ -838,7 +838,7 @@ impl TokenTree { tts: vec![TtToken(sp, token::Ident(token::str_to_ident("doc"), token::Plain)), TtToken(sp, token::Eq), - TtToken(sp, token::Literal(token::Str_(name)))], + TtToken(sp, token::Literal(token::Str_(name), None))], close_span: sp, })) } diff --git a/src/libsyntax/diagnostics/plugin.rs b/src/libsyntax/diagnostics/plugin.rs index b928fc778e8bc..281bde3129aba 100644 --- a/src/libsyntax/diagnostics/plugin.rs +++ b/src/libsyntax/diagnostics/plugin.rs @@ -87,7 +87,7 @@ pub fn expand_register_diagnostic<'cx>(ecx: &'cx mut ExtCtxt, }, [ast::TtToken(_, token::Ident(ref code, _)), ast::TtToken(_, token::Comma), - ast::TtToken(_, token::Literal(token::StrRaw(description, _)))] => { + ast::TtToken(_, token::Literal(token::StrRaw(description, _), None))] => { (code, Some(description)) } _ => unreachable!() diff --git a/src/libsyntax/ext/quote.rs b/src/libsyntax/ext/quote.rs index ec51ce00605fd..eaa3632cf499e 100644 --- a/src/libsyntax/ext/quote.rs +++ b/src/libsyntax/ext/quote.rs @@ -543,10 +543,13 @@ fn mk_delim(cx: &ExtCtxt, sp: Span, delim: token::DelimToken) -> P { #[allow(non_upper_case_globals)] fn mk_token(cx: &ExtCtxt, sp: Span, tok: &token::Token) -> P { macro_rules! mk_lit { - ($name: expr, $($args: expr),*) => {{ + ($name: expr, $suffix: expr, $($args: expr),*) => {{ let inner = cx.expr_call(sp, mk_token_path(cx, sp, $name), vec![$($args),*]); - - cx.expr_call(sp, mk_token_path(cx, sp, "Literal"), vec![inner]) + let suffix = match $suffix { + Some(name) => cx.expr_some(sp, mk_name(cx, sp, ast::Ident::new(name))), + None => cx.expr_none(sp) + }; + cx.expr_call(sp, mk_token_path(cx, sp, "Literal"), vec![inner, suffix]) }} } match *tok { @@ -567,32 +570,32 @@ fn mk_token(cx: &ExtCtxt, sp: Span, tok: &token::Token) -> P { vec![mk_delim(cx, sp, delim)]); } - token::Literal(token::Byte(i)) => { + token::Literal(token::Byte(i), suf) => { let e_byte = mk_name(cx, sp, i.ident()); - return mk_lit!("Byte", e_byte); + return mk_lit!("Byte", suf, e_byte); } - token::Literal(token::Char(i)) => { + token::Literal(token::Char(i), suf) => { let e_char = mk_name(cx, sp, i.ident()); - return mk_lit!("Char", e_char); + return mk_lit!("Char", suf, e_char); } - token::Literal(token::Integer(i)) => { + token::Literal(token::Integer(i), suf) => { let e_int = mk_name(cx, sp, i.ident()); - return mk_lit!("Integer", e_int); + return mk_lit!("Integer", suf, e_int); } - token::Literal(token::Float(fident)) => { + token::Literal(token::Float(fident), suf) => { let e_fident = mk_name(cx, sp, fident.ident()); - return mk_lit!("Float", e_fident); + return mk_lit!("Float", suf, e_fident); } - token::Literal(token::Str_(ident)) => { - return mk_lit!("Str_", mk_name(cx, sp, ident.ident())) + token::Literal(token::Str_(ident), suf) => { + return mk_lit!("Str_", suf, mk_name(cx, sp, ident.ident())) } - token::Literal(token::StrRaw(ident, n)) => { - return mk_lit!("StrRaw", mk_name(cx, sp, ident.ident()), cx.expr_uint(sp, n)) + token::Literal(token::StrRaw(ident, n), suf) => { + return mk_lit!("StrRaw", suf, mk_name(cx, sp, ident.ident()), cx.expr_uint(sp, n)) } token::Ident(ident, style) => { diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index b7598c7c42820..55c4335941dbc 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -369,6 +369,25 @@ impl<'a> StringReader<'a> { self.nextnextch() == Some(c) } + /// Eats *, if possible. + fn scan_optional_raw_name(&mut self) -> Option { + if !ident_start(self.curr) { + return None + } + let start = self.last_pos; + while ident_continue(self.curr) { + self.bump(); + } + + self.with_str_from(start, |string| { + if string == "_" { + None + } else { + Some(token::intern(string)) + } + }) + } + /// PRECONDITION: self.curr is not whitespace /// Eats any kind of comment. fn scan_comment(&mut self) -> Option { @@ -638,7 +657,7 @@ impl<'a> StringReader<'a> { } /// Lex a LIT_INTEGER or a LIT_FLOAT - fn scan_number(&mut self, c: char) -> token::Token { + fn scan_number(&mut self, c: char) -> token::Lit { let mut num_digits; let mut base = 10; let start_bpos = self.last_pos; @@ -655,17 +674,17 @@ impl<'a> StringReader<'a> { } 'u' | 'i' => { self.scan_int_suffix(); - return token::Literal(token::Integer(self.name_from(start_bpos))); + return token::Integer(self.name_from(start_bpos)); }, 'f' => { let last_pos = self.last_pos; self.scan_float_suffix(); self.check_float_base(start_bpos, last_pos, base); - return token::Literal(token::Float(self.name_from(start_bpos))); + return token::Float(self.name_from(start_bpos)); } _ => { // just a 0 - return token::Literal(token::Integer(self.name_from(start_bpos))); + return token::Integer(self.name_from(start_bpos)); } } } else if c.is_digit_radix(10) { @@ -678,7 +697,7 @@ impl<'a> StringReader<'a> { self.err_span_(start_bpos, self.last_pos, "no valid digits found for number"); // eat any suffix self.scan_int_suffix(); - return token::Literal(token::Integer(token::intern("0"))); + return token::Integer(token::intern("0")); } // might be a float, but don't be greedy if this is actually an @@ -696,13 +715,13 @@ impl<'a> StringReader<'a> { } let last_pos = self.last_pos; self.check_float_base(start_bpos, last_pos, base); - return token::Literal(token::Float(self.name_from(start_bpos))); + return token::Float(self.name_from(start_bpos)); } else if self.curr_is('f') { // or it might be an integer literal suffixed as a float self.scan_float_suffix(); let last_pos = self.last_pos; self.check_float_base(start_bpos, last_pos, base); - return token::Literal(token::Float(self.name_from(start_bpos))); + return token::Float(self.name_from(start_bpos)); } else { // it might be a float if it has an exponent if self.curr_is('e') || self.curr_is('E') { @@ -710,11 +729,11 @@ impl<'a> StringReader<'a> { self.scan_float_suffix(); let last_pos = self.last_pos; self.check_float_base(start_bpos, last_pos, base); - return token::Literal(token::Float(self.name_from(start_bpos))); + return token::Float(self.name_from(start_bpos)); } // but we certainly have an integer! self.scan_int_suffix(); - return token::Literal(token::Integer(self.name_from(start_bpos))); + return token::Integer(self.name_from(start_bpos)); } } @@ -967,7 +986,9 @@ impl<'a> StringReader<'a> { } if is_dec_digit(c) { - return self.scan_number(c.unwrap()); + let num = self.scan_number(c.unwrap()); + let suffix = self.scan_optional_raw_name(); + return token::Literal(num, suffix) } if self.read_embedded_ident { @@ -1126,17 +1147,19 @@ impl<'a> StringReader<'a> { } let id = if valid { self.name_from(start) } else { token::intern("0") }; self.bump(); // advance curr past token - return token::Literal(token::Char(id)); + let suffix = self.scan_optional_raw_name(); + return token::Literal(token::Char(id), suffix); } 'b' => { self.bump(); - return match self.curr { + let lit = match self.curr { Some('\'') => self.scan_byte(), Some('"') => self.scan_byte_string(), Some('r') => self.scan_raw_byte_string(), _ => unreachable!() // Should have been a token::Ident above. }; - + let suffix = self.scan_optional_raw_name(); + return token::Literal(lit, suffix); } '"' => { let start_bpos = self.last_pos; @@ -1157,7 +1180,8 @@ impl<'a> StringReader<'a> { let id = if valid { self.name_from(start_bpos + BytePos(1)) } else { token::intern("??") }; self.bump(); - return token::Literal(token::Str_(id)); + let suffix = self.scan_optional_raw_name(); + return token::Literal(token::Str_(id), suffix); } 'r' => { let start_bpos = self.last_pos; @@ -1224,7 +1248,8 @@ impl<'a> StringReader<'a> { } else { token::intern("??") }; - return token::Literal(token::StrRaw(id, hash_count)); + let suffix = self.scan_optional_raw_name(); + return token::Literal(token::StrRaw(id, hash_count), suffix); } '-' => { if self.nextch_is('>') { @@ -1293,7 +1318,7 @@ impl<'a> StringReader<'a> { || (self.curr_is('#') && self.nextch_is('!') && !self.nextnextch_is('[')) } - fn scan_byte(&mut self) -> token::Token { + fn scan_byte(&mut self) -> token::Lit { self.bump(); let start = self.last_pos; @@ -1314,10 +1339,10 @@ impl<'a> StringReader<'a> { let id = if valid { self.name_from(start) } else { token::intern("??") }; self.bump(); // advance curr past token - return token::Literal(token::Byte(id)); + return token::Byte(id); } - fn scan_byte_string(&mut self) -> token::Token { + fn scan_byte_string(&mut self) -> token::Lit { self.bump(); let start = self.last_pos; let mut valid = true; @@ -1336,10 +1361,10 @@ impl<'a> StringReader<'a> { } let id = if valid { self.name_from(start) } else { token::intern("??") }; self.bump(); - return token::Literal(token::Binary(id)); + return token::Binary(id); } - fn scan_raw_byte_string(&mut self) -> token::Token { + fn scan_raw_byte_string(&mut self) -> token::Lit { let start_bpos = self.last_pos; self.bump(); let mut hash_count = 0u; @@ -1387,9 +1412,9 @@ impl<'a> StringReader<'a> { self.bump(); } self.bump(); - return token::Literal(token::BinaryRaw(self.name_from_to(content_start_bpos, - content_end_bpos), - hash_count)); + return token::BinaryRaw(self.name_from_to(content_start_bpos, + content_end_bpos), + hash_count); } } @@ -1536,17 +1561,17 @@ mod test { #[test] fn character_a() { assert_eq!(setup(&mk_sh(), "'a'".to_string()).next_token().tok, - token::Literal(token::Char(token::intern("a")))); + token::Literal(token::Char(token::intern("a")), None)); } #[test] fn character_space() { assert_eq!(setup(&mk_sh(), "' '".to_string()).next_token().tok, - token::Literal(token::Char(token::intern(" ")))); + token::Literal(token::Char(token::intern(" ")), None)); } #[test] fn character_escaped() { assert_eq!(setup(&mk_sh(), "'\\n'".to_string()).next_token().tok, - token::Literal(token::Char(token::intern("\\n")))); + token::Literal(token::Char(token::intern("\\n")), None)); } #[test] fn lifetime_name() { @@ -1558,7 +1583,38 @@ mod test { assert_eq!(setup(&mk_sh(), "r###\"\"#a\\b\x00c\"\"###".to_string()).next_token() .tok, - token::Literal(token::StrRaw(token::intern("\"#a\\b\x00c\""), 3))); + token::Literal(token::StrRaw(token::intern("\"#a\\b\x00c\""), 3), None)); + } + + #[test] fn literal_suffixes() { + macro_rules! test { + ($input: expr, $tok_type: ident, $tok_contents: expr) => {{ + assert_eq!(setup(&mk_sh(), format!("{}suffix", $input)).next_token().tok, + token::Literal(token::$tok_type(token::intern($tok_contents)), + Some(token::intern("suffix")))); + // with a whitespace separator: + assert_eq!(setup(&mk_sh(), format!("{} suffix", $input)).next_token().tok, + token::Literal(token::$tok_type(token::intern($tok_contents)), + None)); + }} + } + + test!("'a'", Char, "a"); + test!("b'a'", Byte, "a"); + test!("\"a\"", Str_, "a"); + test!("b\"a\"", Binary, "a"); + test!("1234", Integer, "1234"); + test!("0b101", Integer, "0b101"); + test!("0xABC", Integer, "0xABC"); + test!("1.0", Float, "1.0"); + test!("1.0e10", Float, "1.0e10"); + + assert_eq!(setup(&mk_sh(), "r###\"raw\"###suffix".to_string()).next_token().tok, + token::Literal(token::StrRaw(token::intern("raw"), 3), + Some(token::intern("suffix")))); + assert_eq!(setup(&mk_sh(), "br###\"raw\"###suffix".to_string()).next_token().tok, + token::Literal(token::BinaryRaw(token::intern("raw"), 3), + Some(token::intern("suffix")))); } #[test] fn line_doc_comments() { @@ -1574,7 +1630,7 @@ mod test { token::Comment => { }, _ => panic!("expected a comment!") } - assert_eq!(lexer.next_token().tok, token::Literal(token::Char(token::intern("a")))); + assert_eq!(lexer.next_token().tok, token::Literal(token::Char(token::intern("a")), None)); } } diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 4edcb182e5385..b9e1fe07e712a 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -646,6 +646,20 @@ impl<'a> Parser<'a> { } } + pub fn expect_no_suffix(&mut self, sp: Span, kind: &str, suffix: Option) { + match suffix { + None => {/* everything ok */} + Some(suf) => { + let text = suf.as_str(); + if text.is_empty() { + self.span_bug(sp, "found empty non-None literal suffix") + } + self.span_err(sp, &*format!("a {} with a suffix is illegal", kind)); + } + } + } + + /// Attempt to consume a `<`. If `<<` is seen, replace it with a single /// `<` and continue. If a `<` is not seen, return false. /// @@ -968,6 +982,9 @@ impl<'a> Parser<'a> { pub fn span_err(&mut self, sp: Span, m: &str) { self.sess.span_diagnostic.span_err(sp, m) } + pub fn span_bug(&mut self, sp: Span, m: &str) -> ! { + self.sess.span_diagnostic.span_bug(sp, m) + } pub fn abort_if_errors(&mut self) { self.sess.span_diagnostic.handler().abort_if_errors(); } @@ -1640,24 +1657,40 @@ impl<'a> Parser<'a> { /// Matches token_lit = LIT_INTEGER | ... pub fn lit_from_token(&mut self, tok: &token::Token) -> Lit_ { match *tok { - token::Literal(token::Byte(i)) => LitByte(parse::byte_lit(i.as_str()).val0()), - token::Literal(token::Char(i)) => LitChar(parse::char_lit(i.as_str()).val0()), - token::Literal(token::Integer(s)) => parse::integer_lit(s.as_str(), - &self.sess.span_diagnostic, - self.last_span), - token::Literal(token::Float(s)) => parse::float_lit(s.as_str()), - token::Literal(token::Str_(s)) => { - LitStr(token::intern_and_get_ident(parse::str_lit(s.as_str()).as_slice()), - ast::CookedStr) - } - token::Literal(token::StrRaw(s, n)) => { - LitStr(token::intern_and_get_ident(parse::raw_str_lit(s.as_str()).as_slice()), - ast::RawStr(n)) + token::Literal(lit, suf) => { + let (suffix_illegal, out) = match lit { + token::Byte(i) => (true, LitByte(parse::byte_lit(i.as_str()).val0())), + token::Char(i) => (true, LitChar(parse::char_lit(i.as_str()).val0())), + token::Integer(s) => (false, parse::integer_lit(s.as_str(), + &self.sess.span_diagnostic, + self.last_span)), + token::Float(s) => (false, parse::float_lit(s.as_str())), + token::Str_(s) => { + (true, + LitStr(token::intern_and_get_ident(parse::str_lit(s.as_str()).as_slice()), + ast::CookedStr)) + } + token::StrRaw(s, n) => { + (true, + LitStr( + token::intern_and_get_ident( + parse::raw_str_lit(s.as_str()).as_slice()), + ast::RawStr(n))) + } + token::Binary(i) => + (true, LitBinary(parse::binary_lit(i.as_str()))), + token::BinaryRaw(i, _) => + (true, + LitBinary(Rc::new(i.as_str().as_bytes().iter().map(|&x| x).collect()))), + }; + + if suffix_illegal { + let sp = self.last_span; + self.expect_no_suffix(sp, &*format!("{} literal", lit.short_name()), suf) + } + + out } - token::Literal(token::Binary(i)) => - LitBinary(parse::binary_lit(i.as_str())), - token::Literal(token::BinaryRaw(i, _)) => - LitBinary(Rc::new(i.as_str().as_bytes().iter().map(|&x| x).collect())), _ => { self.unexpected_last(tok); } } } @@ -2424,7 +2457,10 @@ impl<'a> Parser<'a> { } } } - token::Literal(token::Integer(n)) => { + token::Literal(token::Integer(n), suf) => { + let sp = self.span; + self.expect_no_suffix(sp, "tuple index", suf); + let index = n.as_str(); let dot = self.last_span.hi; hi = self.span.hi; @@ -2449,7 +2485,7 @@ impl<'a> Parser<'a> { } } } - token::Literal(token::Float(n)) => { + token::Literal(token::Float(n), _suf) => { self.bump(); let last_span = self.last_span; let fstr = n.as_str(); @@ -5085,12 +5121,17 @@ impl<'a> Parser<'a> { self.expect(&token::Semi); (path, the_ident) }, - token::Literal(token::Str_(..)) | token::Literal(token::StrRaw(..)) => { - let path = self.parse_str(); + token::Literal(token::Str_(..), suf) | token::Literal(token::StrRaw(..), suf) => { + let sp = self.span; + self.expect_no_suffix(sp, "extern crate name", suf); + // forgo the internal suffix check of `parse_str` to + // avoid repeats (this unwrap will always succeed due + // to the restriction of the `match`) + let (s, style, _) = self.parse_optional_str().unwrap(); self.expect_keyword(keywords::As); let the_ident = self.parse_ident(); self.expect(&token::Semi); - (Some(path), the_ident) + (Some((s, style)), the_ident) }, _ => { let span = self.span; @@ -5267,7 +5308,9 @@ impl<'a> Parser<'a> { /// the `extern` keyword, if one is found. fn parse_opt_abi(&mut self) -> Option { match self.token { - token::Literal(token::Str_(s)) | token::Literal(token::StrRaw(s, _)) => { + token::Literal(token::Str_(s), suf) | token::Literal(token::StrRaw(s, _), suf) => { + let sp = self.span; + self.expect_no_suffix(sp, "ABI spec", suf); self.bump(); let the_string = s.as_str(); match abi::lookup(the_string) { @@ -5902,21 +5945,27 @@ impl<'a> Parser<'a> { } pub fn parse_optional_str(&mut self) - -> Option<(InternedString, ast::StrStyle)> { - let (s, style) = match self.token { - token::Literal(token::Str_(s)) => (self.id_to_interned_str(s.ident()), ast::CookedStr), - token::Literal(token::StrRaw(s, n)) => { - (self.id_to_interned_str(s.ident()), ast::RawStr(n)) + -> Option<(InternedString, ast::StrStyle, Option)> { + let ret = match self.token { + token::Literal(token::Str_(s), suf) => { + (self.id_to_interned_str(s.ident()), ast::CookedStr, suf) + } + token::Literal(token::StrRaw(s, n), suf) => { + (self.id_to_interned_str(s.ident()), ast::RawStr(n), suf) } _ => return None }; self.bump(); - Some((s, style)) + Some(ret) } pub fn parse_str(&mut self) -> (InternedString, StrStyle) { match self.parse_optional_str() { - Some(s) => { s } + Some((s, style, suf)) => { + let sp = self.last_span; + self.expect_no_suffix(sp, "str literal", suf); + (s, style) + } _ => self.fatal("expected string literal") } } diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index bfa6ca798b233..4272b57a4dc51 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -72,6 +72,19 @@ pub enum Lit { BinaryRaw(ast::Name, uint), /* raw binary str delimited by n hash symbols */ } +impl Lit { + pub fn short_name(&self) -> &'static str { + match *self { + Byte(_) => "byte", + Char(_) => "char", + Integer(_) => "integer", + Float(_) => "float", + Str_(_) | StrRaw(..) => "str", + Binary(_) | BinaryRaw(..) => "binary str" + } + } +} + #[allow(non_camel_case_types)] #[deriving(Clone, Encodable, Decodable, PartialEq, Eq, Hash, Show)] pub enum Token { @@ -111,7 +124,7 @@ pub enum Token { CloseDelim(DelimToken), /* Literals */ - Literal(Lit), + Literal(Lit, Option), /* Name components */ Ident(ast::Ident, IdentStyle), @@ -151,7 +164,7 @@ impl Token { Ident(_, _) => true, Underscore => true, Tilde => true, - Literal(_) => true, + Literal(_, _) => true, Pound => true, At => true, Not => true, @@ -172,7 +185,7 @@ impl Token { /// Returns `true` if the token is any literal pub fn is_lit(&self) -> bool { match *self { - Literal(_) => true, + Literal(_, _) => true, _ => false, } } diff --git a/src/libsyntax/print/pprust.rs b/src/libsyntax/print/pprust.rs index 7997c1ba4efcf..642ffa3745d9f 100644 --- a/src/libsyntax/print/pprust.rs +++ b/src/libsyntax/print/pprust.rs @@ -236,18 +236,28 @@ pub fn token_to_string(tok: &Token) -> String { token::Question => "?".into_string(), /* Literals */ - token::Literal(token::Byte(b)) => format!("b'{}'", b.as_str()), - token::Literal(token::Char(c)) => format!("'{}'", c.as_str()), - token::Literal(token::Float(c)) => c.as_str().into_string(), - token::Literal(token::Integer(c)) => c.as_str().into_string(), - token::Literal(token::Str_(s)) => format!("\"{}\"", s.as_str()), - token::Literal(token::StrRaw(s, n)) => format!("r{delim}\"{string}\"{delim}", - delim="#".repeat(n), - string=s.as_str()), - token::Literal(token::Binary(v)) => format!("b\"{}\"", v.as_str()), - token::Literal(token::BinaryRaw(s, n)) => format!("br{delim}\"{string}\"{delim}", - delim="#".repeat(n), - string=s.as_str()), + token::Literal(lit, suf) => { + let mut out = match lit { + token::Byte(b) => format!("b'{}'", b.as_str()), + token::Char(c) => format!("'{}'", c.as_str()), + token::Float(c) => c.as_str().into_string(), + token::Integer(c) => c.as_str().into_string(), + token::Str_(s) => format!("\"{}\"", s.as_str()), + token::StrRaw(s, n) => format!("r{delim}\"{string}\"{delim}", + delim="#".repeat(n), + string=s.as_str()), + token::Binary(v) => format!("b\"{}\"", v.as_str()), + token::BinaryRaw(s, n) => format!("br{delim}\"{string}\"{delim}", + delim="#".repeat(n), + string=s.as_str()), + }; + + if let Some(s) = suf { + out.push_str(s.as_str()) + } + + out + } /* Name components */ token::Ident(s, _) => token::get_ident(s).get().into_string(), diff --git a/src/test/compile-fail/bad-lit-suffixes.rs b/src/test/compile-fail/bad-lit-suffixes.rs new file mode 100644 index 0000000000000..e48bb807488a0 --- /dev/null +++ b/src/test/compile-fail/bad-lit-suffixes.rs @@ -0,0 +1,36 @@ +// Copyright 2014 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + + +extern crate + "foo"suffix //~ ERROR extern crate name with a suffix is illegal + as foo; + +extern + "C"suffix //~ ERROR ABI spec with a suffix is illegal + fn foo() {} + +extern + "C"suffix //~ ERROR ABI spec with a suffix is illegal +{} + +fn main() { + ""suffix; //~ ERROR str literal with a suffix is illegal + b""suffix; //~ ERROR binary str literal with a suffix is illegal + r#""#suffix; //~ ERROR str literal with a suffix is illegal + br#""#suffix; //~ ERROR binary str literal with a suffix is illegal + 'a'suffix; //~ ERROR char literal with a suffix is illegal + b'a'suffix; //~ ERROR byte literal with a suffix is illegal + + 1234suffix; + 0b101suffix; + 1.0suffix; + 1.0e10suffix; +} From 606a309d4aeb09ba88a0962c633a5b3fd4b300f6 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Wed, 19 Nov 2014 20:22:54 +1100 Subject: [PATCH 5/7] Switch numeric suffix parsing to use the new system. This moves errors and all handling of numeric suffixes into the parser rather than the lexer. --- src/libsyntax/parse/lexer/mod.rs | 74 +---------- src/libsyntax/parse/mod.rs | 145 ++++++++++++---------- src/libsyntax/parse/parser.rs | 25 +++- src/test/compile-fail/bad-lit-suffixes.rs | 13 +- 4 files changed, 108 insertions(+), 149 deletions(-) diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 55c4335941dbc..fbca4868255ff 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -672,16 +672,6 @@ impl<'a> StringReader<'a> { '0'...'9' | '_' | '.' => { num_digits = self.scan_digits(10) + 1; } - 'u' | 'i' => { - self.scan_int_suffix(); - return token::Integer(self.name_from(start_bpos)); - }, - 'f' => { - let last_pos = self.last_pos; - self.scan_float_suffix(); - self.check_float_base(start_bpos, last_pos, base); - return token::Float(self.name_from(start_bpos)); - } _ => { // just a 0 return token::Integer(self.name_from(start_bpos)); @@ -695,8 +685,6 @@ impl<'a> StringReader<'a> { if num_digits == 0 { self.err_span_(start_bpos, self.last_pos, "no valid digits found for number"); - // eat any suffix - self.scan_int_suffix(); return token::Integer(token::intern("0")); } @@ -711,28 +699,19 @@ impl<'a> StringReader<'a> { if self.curr.unwrap_or('\0').is_digit_radix(10) { self.scan_digits(10); self.scan_float_exponent(); - self.scan_float_suffix(); } let last_pos = self.last_pos; self.check_float_base(start_bpos, last_pos, base); return token::Float(self.name_from(start_bpos)); - } else if self.curr_is('f') { - // or it might be an integer literal suffixed as a float - self.scan_float_suffix(); - let last_pos = self.last_pos; - self.check_float_base(start_bpos, last_pos, base); - return token::Float(self.name_from(start_bpos)); } else { // it might be a float if it has an exponent if self.curr_is('e') || self.curr_is('E') { self.scan_float_exponent(); - self.scan_float_suffix(); let last_pos = self.last_pos; self.check_float_base(start_bpos, last_pos, base); return token::Float(self.name_from(start_bpos)); } // but we certainly have an integer! - self.scan_int_suffix(); return token::Integer(self.name_from(start_bpos)); } } @@ -869,55 +848,6 @@ impl<'a> StringReader<'a> { true } - /// Scan over an int literal suffix. - fn scan_int_suffix(&mut self) { - match self.curr { - Some('i') | Some('u') => { - self.bump(); - - if self.curr_is('8') { - self.bump(); - } else if self.curr_is('1') { - if !self.nextch_is('6') { - self.err_span_(self.last_pos, self.pos, - "illegal int suffix"); - } else { - self.bump(); self.bump(); - } - } else if self.curr_is('3') { - if !self.nextch_is('2') { - self.err_span_(self.last_pos, self.pos, - "illegal int suffix"); - } else { - self.bump(); self.bump(); - } - } else if self.curr_is('6') { - if !self.nextch_is('4') { - self.err_span_(self.last_pos, self.pos, - "illegal int suffix"); - } else { - self.bump(); self.bump(); - } - } - }, - _ => { } - } - } - - /// Scan over a float literal suffix - fn scan_float_suffix(&mut self) { - if self.curr_is('f') { - if (self.nextch_is('3') && self.nextnextch_is('2')) - || (self.nextch_is('6') && self.nextnextch_is('4')) { - self.bump(); - self.bump(); - self.bump(); - } else { - self.err_span_(self.last_pos, self.pos, "illegal float suffix"); - } - } - } - /// Scan over a float exponent. fn scan_float_exponent(&mut self) { if self.curr_is('e') || self.curr_is('E') { @@ -988,6 +918,7 @@ impl<'a> StringReader<'a> { if is_dec_digit(c) { let num = self.scan_number(c.unwrap()); let suffix = self.scan_optional_raw_name(); + debug!("next_token_inner: scanned number {}, {}", num, suffix); return token::Literal(num, suffix) } @@ -1609,6 +1540,9 @@ mod test { test!("1.0", Float, "1.0"); test!("1.0e10", Float, "1.0e10"); + assert_eq!(setup(&mk_sh(), "2u".to_string()).next_token().tok, + token::Literal(token::Integer(token::intern("2")), + Some(token::intern("u")))); assert_eq!(setup(&mk_sh(), "r###\"raw\"###suffix".to_string()).next_token().tok, token::Literal(token::StrRaw(token::intern("raw"), 3), Some(token::intern("suffix")))); diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index 2810db4eaddd8..d111108269dfe 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -511,28 +511,41 @@ pub fn raw_str_lit(lit: &str) -> String { res } -pub fn float_lit(s: &str) -> ast::Lit_ { - debug!("float_lit: {}", s); - // FIXME #2252: bounds checking float literals is defered until trans - let s2 = s.chars().filter(|&c| c != '_').collect::(); - let s = s2.as_slice(); - - let mut ty = None; - - if s.ends_with("f32") { - ty = Some(ast::TyF32); - } else if s.ends_with("f64") { - ty = Some(ast::TyF64); - } +// check if `s` looks like i32 or u1234 etc. +fn looks_like_width_suffix(first_chars: &[char], s: &str) -> bool { + s.len() > 1 && + first_chars.contains(&s.char_at(0)) && + s.slice_from(1).chars().all(|c| '0' <= c && c <= '9') +} +fn filtered_float_lit(data: token::InternedString, suffix: Option<&str>, + sd: &SpanHandler, sp: Span) -> ast::Lit_ { + debug!("filtered_float_lit: {}, {}", data, suffix); + match suffix { + Some("f32") => ast::LitFloat(data, ast::TyF32), + Some("f64") => ast::LitFloat(data, ast::TyF64), + Some(suf) => { + if suf.len() >= 2 && looks_like_width_suffix(&['f'], suf) { + // if it looks like a width, lets try to be helpful. + sd.span_err(sp, &*format!("illegal width `{}` for float literal, \ + valid widths are 32 and 64", suf.slice_from(1))); + } else { + sd.span_err(sp, &*format!("illegal suffix `{}` for float literal, \ + valid suffixes are `f32` and `f64`", suf)); + } - match ty { - Some(t) => { - ast::LitFloat(token::intern_and_get_ident(s.slice_to(s.len() - t.suffix_len())), t) - }, - None => ast::LitFloatUnsuffixed(token::intern_and_get_ident(s)) + ast::LitFloatUnsuffixed(data) + } + None => ast::LitFloatUnsuffixed(data) } } +pub fn float_lit(s: &str, suffix: Option<&str>, sd: &SpanHandler, sp: Span) -> ast::Lit_ { + debug!("float_lit: {}, {}", s, suffix); + // FIXME #2252: bounds checking float literals is defered until trans + let s = s.chars().filter(|&c| c != '_').collect::(); + let data = token::intern_and_get_ident(&*s); + filtered_float_lit(data, suffix, sd, sp) +} /// Parse a string representing a byte literal into its final form. Similar to `char_lit` pub fn byte_lit(lit: &str) -> (u8, uint) { @@ -626,24 +639,19 @@ pub fn binary_lit(lit: &str) -> Rc> { Rc::new(res) } -pub fn integer_lit(s: &str, sd: &SpanHandler, sp: Span) -> ast::Lit_ { +pub fn integer_lit(s: &str, suffix: Option<&str>, sd: &SpanHandler, sp: Span) -> ast::Lit_ { // s can only be ascii, byte indexing is fine let s2 = s.chars().filter(|&c| c != '_').collect::(); let mut s = s2.as_slice(); - debug!("parse_integer_lit: {}", s); - - if s.len() == 1 { - let n = (s.char_at(0)).to_digit(10).unwrap(); - return ast::LitInt(n as u64, ast::UnsuffixedIntLit(ast::Sign::new(n))); - } + debug!("integer_lit: {}, {}", s, suffix); let mut base = 10; let orig = s; let mut ty = ast::UnsuffixedIntLit(ast::Plus); - if s.char_at(0) == '0' { + if s.char_at(0) == '0' && s.len() > 1 { match s.char_at(1) { 'x' => base = 16, 'o' => base = 8, @@ -652,57 +660,56 @@ pub fn integer_lit(s: &str, sd: &SpanHandler, sp: Span) -> ast::Lit_ { } } + // 1f64 and 2f32 etc. are valid float literals. + match suffix { + Some(suf) if looks_like_width_suffix(&['f'], suf) => { + match base { + 16u => sd.span_err(sp, "hexadecimal float literal is not supported"), + 8u => sd.span_err(sp, "octal float literal is not supported"), + 2u => sd.span_err(sp, "binary float literal is not supported"), + _ => () + } + let ident = token::intern_and_get_ident(&*s); + return filtered_float_lit(ident, suffix, sd, sp) + } + _ => {} + } + if base != 10 { s = s.slice_from(2); } - let last = s.len() - 1; - match s.char_at(last) { - 'i' => ty = ast::SignedIntLit(ast::TyI, ast::Plus), - 'u' => ty = ast::UnsignedIntLit(ast::TyU), - '8' => { - if s.len() > 2 { - match s.char_at(last - 1) { - 'i' => ty = ast::SignedIntLit(ast::TyI8, ast::Plus), - 'u' => ty = ast::UnsignedIntLit(ast::TyU8), - _ => { } - } - } - }, - '6' => { - if s.len() > 3 && s.char_at(last - 1) == '1' { - match s.char_at(last - 2) { - 'i' => ty = ast::SignedIntLit(ast::TyI16, ast::Plus), - 'u' => ty = ast::UnsignedIntLit(ast::TyU16), - _ => { } - } - } - }, - '2' => { - if s.len() > 3 && s.char_at(last - 1) == '3' { - match s.char_at(last - 2) { - 'i' => ty = ast::SignedIntLit(ast::TyI32, ast::Plus), - 'u' => ty = ast::UnsignedIntLit(ast::TyU32), - _ => { } - } - } - }, - '4' => { - if s.len() > 3 && s.char_at(last - 1) == '6' { - match s.char_at(last - 2) { - 'i' => ty = ast::SignedIntLit(ast::TyI64, ast::Plus), - 'u' => ty = ast::UnsignedIntLit(ast::TyU64), - _ => { } + if let Some(suf) = suffix { + if suf.is_empty() { sd.span_bug(sp, "found empty literal suffix in Some")} + ty = match suf { + "i" => ast::SignedIntLit(ast::TyI, ast::Plus), + "i8" => ast::SignedIntLit(ast::TyI8, ast::Plus), + "i16" => ast::SignedIntLit(ast::TyI16, ast::Plus), + "i32" => ast::SignedIntLit(ast::TyI32, ast::Plus), + "i64" => ast::SignedIntLit(ast::TyI64, ast::Plus), + "u" => ast::UnsignedIntLit(ast::TyU), + "u8" => ast::UnsignedIntLit(ast::TyU8), + "u16" => ast::UnsignedIntLit(ast::TyU16), + "u32" => ast::UnsignedIntLit(ast::TyU32), + "u64" => ast::UnsignedIntLit(ast::TyU64), + _ => { + // i and u look like widths, so lets + // give an error message along those lines + if looks_like_width_suffix(&['i', 'u'], suf) { + sd.span_err(sp, &*format!("illegal width `{}` for integer literal; \ + valid widths are 8, 16, 32 and 64", + suf.slice_from(1))); + } else { + sd.span_err(sp, &*format!("illegal suffix `{}` for numeric literal", suf)); } + + ty } - }, - _ => { } + } } - debug!("The suffix is {}, base {}, the new string is {}, the original \ - string was {}", ty, base, s, orig); - - s = s.slice_to(s.len() - ty.suffix_len()); + debug!("integer_lit: the type is {}, base {}, the new string is {}, the original \ + string was {}, the original suffix was {}", ty, base, s, orig, suffix); let res: u64 = match ::std::num::from_str_radix(s, base) { Some(r) => r, diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index b9e1fe07e712a..85364b8f65ffa 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -652,9 +652,9 @@ impl<'a> Parser<'a> { Some(suf) => { let text = suf.as_str(); if text.is_empty() { - self.span_bug(sp, "found empty non-None literal suffix") + self.span_bug(sp, "found empty literal suffix in Some") } - self.span_err(sp, &*format!("a {} with a suffix is illegal", kind)); + self.span_err(sp, &*format!("{} with a suffix is illegal", kind)); } } } @@ -1661,10 +1661,23 @@ impl<'a> Parser<'a> { let (suffix_illegal, out) = match lit { token::Byte(i) => (true, LitByte(parse::byte_lit(i.as_str()).val0())), token::Char(i) => (true, LitChar(parse::char_lit(i.as_str()).val0())), - token::Integer(s) => (false, parse::integer_lit(s.as_str(), - &self.sess.span_diagnostic, - self.last_span)), - token::Float(s) => (false, parse::float_lit(s.as_str())), + + // there are some valid suffixes for integer and + // float literals, so all the handling is done + // internally. + token::Integer(s) => { + (false, parse::integer_lit(s.as_str(), + suf.as_ref().map(|s| s.as_str()), + &self.sess.span_diagnostic, + self.last_span)) + } + token::Float(s) => { + (false, parse::float_lit(s.as_str(), + suf.as_ref().map(|s| s.as_str()), + &self.sess.span_diagnostic, + self.last_span)) + } + token::Str_(s) => { (true, LitStr(token::intern_and_get_ident(parse::str_lit(s.as_str()).as_slice()), diff --git a/src/test/compile-fail/bad-lit-suffixes.rs b/src/test/compile-fail/bad-lit-suffixes.rs index e48bb807488a0..e142365a8ca07 100644 --- a/src/test/compile-fail/bad-lit-suffixes.rs +++ b/src/test/compile-fail/bad-lit-suffixes.rs @@ -29,8 +29,13 @@ fn main() { 'a'suffix; //~ ERROR char literal with a suffix is illegal b'a'suffix; //~ ERROR byte literal with a suffix is illegal - 1234suffix; - 0b101suffix; - 1.0suffix; - 1.0e10suffix; + 1234u1024; //~ ERROR illegal width `1024` for integer literal + 1234i1024; //~ ERROR illegal width `1024` for integer literal + 1234f1024; //~ ERROR illegal width `1024` for float literal + 1234.5f1024; //~ ERROR illegal width `1024` for float literal + + 1234suffix; //~ ERROR illegal suffix `suffix` for numeric literal + 0b101suffix; //~ ERROR illegal suffix `suffix` for numeric literal + 1.0suffix; //~ ERROR illegal suffix `suffix` for numeric literal + 1.0e10suffix; //~ ERROR illegal suffix `suffix` for numeric literal } From 7586abf01b5c6fcbb60926c7cab7d3e5d133fa9a Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Wed, 19 Nov 2014 20:25:48 +1100 Subject: [PATCH 6/7] Adjust Antlr4 lexer to include suffixes. This makes the formal lexical grammar (more closely) reflect the one implemented by the compiler. --- src/grammar/RustLexer.g4 | 44 ++++++++++++++-------------------------- 1 file changed, 15 insertions(+), 29 deletions(-) diff --git a/src/grammar/RustLexer.g4 b/src/grammar/RustLexer.g4 index e8165dabce5cd..0ff9af7aca133 100644 --- a/src/grammar/RustLexer.g4 +++ b/src/grammar/RustLexer.g4 @@ -92,49 +92,35 @@ fragment CHAR_ESCAPE | 'U' HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT HEXIT ; -LIT_CHAR - : '\'' ( '\\' CHAR_ESCAPE | ~[\\'\n\t\r] ) '\'' +fragment SUFFIX + : IDENT ; -LIT_BYTE - : 'b\'' ( '\\' ( [xX] HEXIT HEXIT | [nrt\\'"0] ) | ~[\\'\n\t\r] ) '\'' +LIT_CHAR + : '\'' ( '\\' CHAR_ESCAPE | ~[\\'\n\t\r] ) '\'' SUFFIX? ; -fragment INT_SUFFIX - : 'i' - | 'i8' - | 'i16' - | 'i32' - | 'i64' - | 'u' - | 'u8' - | 'u16' - | 'u32' - | 'u64' +LIT_BYTE + : 'b\'' ( '\\' ( [xX] HEXIT HEXIT | [nrt\\'"0] ) | ~[\\'\n\t\r] ) '\'' SUFFIX? ; LIT_INTEGER - : [0-9][0-9_]* INT_SUFFIX? - | '0b' [01][01_]* INT_SUFFIX? - | '0o' [0-7][0-7_]* INT_SUFFIX? - | '0x' [0-9a-fA-F][0-9a-fA-F_]* INT_SUFFIX? - ; - -fragment FLOAT_SUFFIX - : 'f32' - | 'f64' + : [0-9][0-9_]* SUFFIX? + | '0b' [01][01_]* SUFFIX? + | '0o' [0-7][0-7_]* SUFFIX? + | '0x' [0-9a-fA-F][0-9a-fA-F_]* SUFFIX? ; LIT_FLOAT - : [0-9][0-9_]* ('.' | ('.' [0-9][0-9_]*)? ([eE] [-+]? [0-9][0-9_]*)? FLOAT_SUFFIX?) + : [0-9][0-9_]* ('.' | ('.' [0-9][0-9_]*)? ([eE] [-+]? [0-9][0-9_]*)? SUFFIX?) ; LIT_STR - : '"' ('\\\n' | '\\\r\n' | '\\' CHAR_ESCAPE | .)*? '"' + : '"' ('\\\n' | '\\\r\n' | '\\' CHAR_ESCAPE | .)*? '"' SUFFIX? ; -LIT_BINARY : 'b' LIT_STR ; -LIT_BINARY_RAW : 'rb' LIT_STR_RAW ; +LIT_BINARY : 'b' LIT_STR SUFFIX?; +LIT_BINARY_RAW : 'rb' LIT_STR_RAW SUFFIX?; /* this is a bit messy */ @@ -148,7 +134,7 @@ fragment LIT_STR_RAW_INNER2 ; LIT_STR_RAW - : 'r' LIT_STR_RAW_INNER + : 'r' LIT_STR_RAW_INNER SUFFIX? ; IDENT : XID_start XID_continue* ; From a11078f8c3a69f0d4ea20ca10aaf96622770615b Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Wed, 19 Nov 2014 21:12:40 +1100 Subject: [PATCH 7/7] Update documentation for literal suffixes. This changes the stated grammar of literals to move all suffixes into the generic literal production. --- src/doc/reference.md | 43 ++++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/src/doc/reference.md b/src/doc/reference.md index 62e0f5e4f1f48..0b4a745f98716 100644 --- a/src/doc/reference.md +++ b/src/doc/reference.md @@ -216,9 +216,15 @@ rather than referring to it by name or some other evaluation rule. A literal is a form of constant expression, so is evaluated (primarily) at compile time. ```{.ebnf .gram} -literal : string_lit | char_lit | byte_string_lit | byte_lit | num_lit ; +lit_suffix : ident; +literal : [ string_lit | char_lit | byte_string_lit | byte_lit | num_lit ] lit_suffix ?; ``` +The optional suffix is only used for certain numeric literals, but is +reserved for future extension, that is, the above gives the lexical +grammar, but a Rust parser will reject everything but the 12 special +cases mentioned in [Number literals](#number-literals) below. + #### Character and string literals ```{.ebnf .gram} @@ -371,27 +377,20 @@ b"\\x52"; br"\x52"; // \x52 #### Number literals ```{.ebnf .gram} -num_lit : nonzero_dec [ dec_digit | '_' ] * num_suffix ? - | '0' [ [ dec_digit | '_' ] * num_suffix ? - | 'b' [ '1' | '0' | '_' ] + int_suffix ? - | 'o' [ oct_digit | '_' ] + int_suffix ? - | 'x' [ hex_digit | '_' ] + int_suffix ? ] ; - -num_suffix : int_suffix | float_suffix ; +num_lit : nonzero_dec [ dec_digit | '_' ] * float_suffix ? + | '0' [ [ dec_digit | '_' ] * float_suffix ? + | 'b' [ '1' | '0' | '_' ] + + | 'o' [ oct_digit | '_' ] + + | 'x' [ hex_digit | '_' ] + ] ; -int_suffix : 'u' int_suffix_size ? - | 'i' int_suffix_size ? ; -int_suffix_size : [ '8' | "16" | "32" | "64" ] ; +float_suffix : [ exponent | '.' dec_lit exponent ? ] ? ; -float_suffix : [ exponent | '.' dec_lit exponent ? ] ? float_suffix_ty ? ; -float_suffix_ty : 'f' [ "32" | "64" ] ; exponent : ['E' | 'e'] ['-' | '+' ] ? dec_lit ; dec_lit : [ dec_digit | '_' ] + ; ``` A _number literal_ is either an _integer literal_ or a _floating-point -literal_. The grammar for recognizing the two kinds of literals is mixed, as -they are differentiated by suffixes. +literal_. The grammar for recognizing the two kinds of literals is mixed. ##### Integer literals @@ -406,9 +405,9 @@ An _integer literal_ has one of four forms: * A _binary literal_ starts with the character sequence `U+0030` `U+0062` (`0b`) and continues as any mixture of binary digits and underscores. -An integer literal may be followed (immediately, without any spaces) by an -_integer suffix_, which changes the type of the literal. There are two kinds of -integer literal suffix: +Like any literal, an integer literal may be followed (immediately, +without any spaces) by an _integer suffix_, which forcibly sets the +type of the literal. There are 10 valid values for an integer suffix: * The `i` and `u` suffixes give the literal type `int` or `uint`, respectively. @@ -443,11 +442,9 @@ A _floating-point literal_ has one of two forms: * A single _decimal literal_ followed by an _exponent_. By default, a floating-point literal has a generic type, and, like integer -literals, the type must be uniquely determined from the context. A -floating-point literal may be followed (immediately, without any spaces) by a -_floating-point suffix_, which changes the type of the literal. There are two -floating-point suffixes: `f32`, and `f64` (the 32-bit and 64-bit floating point -types). +literals, the type must be uniquely determined from the context. There are two valid +_floating-point suffixes_, `f32` and `f64` (the 32-bit and 64-bit floating point +types), which explicitly determine the type of the literal. Examples of floating-point literals of various forms: