From ccffea5b6b3372cefd4e15bc738a2669bc6f69a0 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 29 Aug 2020 12:10:16 +0200 Subject: [PATCH 1/2] Move lexer unit tests to rustc_lexer StringReader is an intornal abstraction which at the moment changes a lot, so these unit tests cause quite a bit of friction. Moving them to rustc_lexer and more ingerated-testing style should make them much less annoying, hopefully without decreasing their usefulness much. Note that coloncolon tests are removed (it's unclear what those are testing). \r\n tests are removed as well, as we normalize line endings even before lexing. --- Cargo.lock | 15 +- compiler/rustc_expand/src/lib.rs | 5 - .../rustc_expand/src/parse/lexer/tests.rs | 252 ------------------ compiler/rustc_lexer/src/tests.rs | 160 +++++++++-- 4 files changed, 154 insertions(+), 278 deletions(-) delete mode 100644 compiler/rustc_expand/src/parse/lexer/tests.rs diff --git a/Cargo.lock b/Cargo.lock index ffc1f0dec1d9a..d549396943392 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -342,7 +342,7 @@ dependencies = [ name = "cargo-miri" version = "0.1.0" dependencies = [ - "cargo_metadata 0.11.1", + "cargo_metadata 0.9.1", "directories", "rustc-workspace-hack", "rustc_version", @@ -391,6 +391,18 @@ dependencies = [ "serde_json", ] +[[package]] +name = "cargo_metadata" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46e3374c604fb39d1a2f35ed5e4a4e30e60d01fab49446e08f1b3e9a90aef202" +dependencies = [ + "semver 0.9.0", + "serde", + "serde_derive", + "serde_json", +] + [[package]] name = "cargo_metadata" version = "0.11.1" @@ -1979,6 +1991,7 @@ dependencies = [ name = "miri" version = "0.1.0" dependencies = [ + "byteorder", "colored", "compiletest_rs", "env_logger 0.7.1", diff --git a/compiler/rustc_expand/src/lib.rs b/compiler/rustc_expand/src/lib.rs index 7f631cb71afa8..5436b1ef737f5 100644 --- a/compiler/rustc_expand/src/lib.rs +++ b/compiler/rustc_expand/src/lib.rs @@ -39,11 +39,6 @@ mod tests; mod parse { #[cfg(test)] mod tests; - #[cfg(test)] - mod lexer { - #[cfg(test)] - mod tests; - } } #[cfg(test)] mod tokenstream { diff --git a/compiler/rustc_expand/src/parse/lexer/tests.rs b/compiler/rustc_expand/src/parse/lexer/tests.rs deleted file mode 100644 index 871844442839c..0000000000000 --- a/compiler/rustc_expand/src/parse/lexer/tests.rs +++ /dev/null @@ -1,252 +0,0 @@ -use rustc_ast::ast::AttrStyle; -use rustc_ast::token::{self, CommentKind, Token, TokenKind}; -use rustc_data_structures::sync::Lrc; -use rustc_errors::{emitter::EmitterWriter, Handler}; -use rustc_parse::lexer::StringReader; -use rustc_session::parse::ParseSess; -use rustc_span::source_map::{FilePathMapping, SourceMap}; -use rustc_span::symbol::Symbol; -use rustc_span::with_default_session_globals; -use rustc_span::{BytePos, Span}; - -use std::io; -use std::path::PathBuf; - -fn mk_sess(sm: Lrc) -> ParseSess { - let emitter = EmitterWriter::new( - Box::new(io::sink()), - Some(sm.clone()), - false, - false, - false, - None, - false, - ); - ParseSess::with_span_handler(Handler::with_emitter(true, None, Box::new(emitter)), sm) -} - -// Creates a string reader for the given string. -fn setup<'a>(sm: &SourceMap, sess: &'a ParseSess, teststr: String) -> StringReader<'a> { - let sf = sm.new_source_file(PathBuf::from(teststr.clone()).into(), teststr); - StringReader::new(sess, sf, None) -} - -#[test] -fn t1() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - let mut string_reader = setup( - &sm, - &sh, - "/* my source file */ fn main() { println!(\"zebra\"); }\n".to_string(), - ); - assert_eq!(string_reader.next_token(), token::Comment); - assert_eq!(string_reader.next_token(), token::Whitespace); - let tok1 = string_reader.next_token(); - let tok2 = Token::new(mk_ident("fn"), Span::with_root_ctxt(BytePos(21), BytePos(23))); - assert_eq!(tok1.kind, tok2.kind); - assert_eq!(tok1.span, tok2.span); - assert_eq!(string_reader.next_token(), token::Whitespace); - // Read another token. - let tok3 = string_reader.next_token(); - assert_eq!(string_reader.pos(), BytePos(28)); - let tok4 = Token::new(mk_ident("main"), Span::with_root_ctxt(BytePos(24), BytePos(28))); - assert_eq!(tok3.kind, tok4.kind); - assert_eq!(tok3.span, tok4.span); - - assert_eq!(string_reader.next_token(), token::OpenDelim(token::Paren)); - assert_eq!(string_reader.pos(), BytePos(29)) - }) -} - -// Checks that the given reader produces the desired stream -// of tokens (stop checking after exhausting `expected`). -fn check_tokenization(mut string_reader: StringReader<'_>, expected: Vec) { - for expected_tok in &expected { - assert_eq!(&string_reader.next_token(), expected_tok); - } -} - -// Makes the identifier by looking up the string in the interner. -fn mk_ident(id: &str) -> TokenKind { - token::Ident(Symbol::intern(id), false) -} - -fn mk_lit(kind: token::LitKind, symbol: &str, suffix: Option<&str>) -> TokenKind { - TokenKind::lit(kind, Symbol::intern(symbol), suffix.map(Symbol::intern)) -} - -#[test] -fn doublecolon_parsing() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - check_tokenization( - setup(&sm, &sh, "a b".to_string()), - vec![mk_ident("a"), token::Whitespace, mk_ident("b")], - ); - }) -} - -#[test] -fn doublecolon_parsing_2() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - check_tokenization( - setup(&sm, &sh, "a::b".to_string()), - vec![mk_ident("a"), token::Colon, token::Colon, mk_ident("b")], - ); - }) -} - -#[test] -fn doublecolon_parsing_3() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - check_tokenization( - setup(&sm, &sh, "a ::b".to_string()), - vec![mk_ident("a"), token::Whitespace, token::Colon, token::Colon, mk_ident("b")], - ); - }) -} - -#[test] -fn doublecolon_parsing_4() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - check_tokenization( - setup(&sm, &sh, "a:: b".to_string()), - vec![mk_ident("a"), token::Colon, token::Colon, token::Whitespace, mk_ident("b")], - ); - }) -} - -#[test] -fn character_a() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - assert_eq!(setup(&sm, &sh, "'a'".to_string()).next_token(), mk_lit(token::Char, "a", None),); - }) -} - -#[test] -fn character_space() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - assert_eq!(setup(&sm, &sh, "' '".to_string()).next_token(), mk_lit(token::Char, " ", None),); - }) -} - -#[test] -fn character_escaped() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - assert_eq!( - setup(&sm, &sh, "'\\n'".to_string()).next_token(), - mk_lit(token::Char, "\\n", None), - ); - }) -} - -#[test] -fn lifetime_name() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - assert_eq!( - setup(&sm, &sh, "'abc".to_string()).next_token(), - token::Lifetime(Symbol::intern("'abc")), - ); - }) -} - -#[test] -fn raw_string() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - assert_eq!( - setup(&sm, &sh, "r###\"\"#a\\b\x00c\"\"###".to_string()).next_token(), - mk_lit(token::StrRaw(3), "\"#a\\b\x00c\"", None), - ); - }) -} - -#[test] -fn literal_suffixes() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - macro_rules! test { - ($input: expr, $tok_type: ident, $tok_contents: expr) => {{ - assert_eq!( - setup(&sm, &sh, format!("{}suffix", $input)).next_token(), - mk_lit(token::$tok_type, $tok_contents, Some("suffix")), - ); - // with a whitespace separator - assert_eq!( - setup(&sm, &sh, format!("{} suffix", $input)).next_token(), - mk_lit(token::$tok_type, $tok_contents, None), - ); - }}; - } - - test!("'a'", Char, "a"); - test!("b'a'", Byte, "a"); - test!("\"a\"", Str, "a"); - test!("b\"a\"", ByteStr, "a"); - test!("1234", Integer, "1234"); - test!("0b101", Integer, "0b101"); - test!("0xABC", Integer, "0xABC"); - test!("1.0", Float, "1.0"); - test!("1.0e10", Float, "1.0e10"); - - assert_eq!( - setup(&sm, &sh, "2us".to_string()).next_token(), - mk_lit(token::Integer, "2", Some("us")), - ); - assert_eq!( - setup(&sm, &sh, "r###\"raw\"###suffix".to_string()).next_token(), - mk_lit(token::StrRaw(3), "raw", Some("suffix")), - ); - assert_eq!( - setup(&sm, &sh, "br###\"raw\"###suffix".to_string()).next_token(), - mk_lit(token::ByteStrRaw(3), "raw", Some("suffix")), - ); - }) -} - -#[test] -fn nested_block_comments() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - let mut lexer = setup(&sm, &sh, "/* /* */ */'a'".to_string()); - assert_eq!(lexer.next_token(), token::Comment); - assert_eq!(lexer.next_token(), mk_lit(token::Char, "a", None)); - }) -} - -#[test] -fn crlf_comments() { - with_default_session_globals(|| { - let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); - let sh = mk_sess(sm.clone()); - let mut lexer = setup(&sm, &sh, "// test\r\n/// test\r\n".to_string()); - let comment = lexer.next_token(); - assert_eq!(comment.kind, token::Comment); - assert_eq!((comment.span.lo(), comment.span.hi()), (BytePos(0), BytePos(7))); - assert_eq!(lexer.next_token(), token::Whitespace); - assert_eq!( - lexer.next_token(), - token::DocComment(CommentKind::Line, AttrStyle::Outer, Symbol::intern(" test")) - ); - }) -} diff --git a/compiler/rustc_lexer/src/tests.rs b/compiler/rustc_lexer/src/tests.rs index a1ea5ceb1f612..94017b7b286e2 100644 --- a/compiler/rustc_lexer/src/tests.rs +++ b/compiler/rustc_lexer/src/tests.rs @@ -128,6 +128,34 @@ fn check_lexing(src: &str, expect: Expect) { expect.assert_eq(&actual) } +#[test] +fn smoke_test() { + check_lexing( + "/* my source file */ fn main() { println!(\"zebra\"); }\n", + expect![[r#" + Token { kind: BlockComment { doc_style: None, terminated: true }, len: 20 } + Token { kind: Whitespace, len: 1 } + Token { kind: Ident, len: 2 } + Token { kind: Whitespace, len: 1 } + Token { kind: Ident, len: 4 } + Token { kind: OpenParen, len: 1 } + Token { kind: CloseParen, len: 1 } + Token { kind: Whitespace, len: 1 } + Token { kind: OpenBrace, len: 1 } + Token { kind: Whitespace, len: 1 } + Token { kind: Ident, len: 7 } + Token { kind: Bang, len: 1 } + Token { kind: OpenParen, len: 1 } + Token { kind: Literal { kind: Str { terminated: true }, suffix_start: 7 }, len: 7 } + Token { kind: CloseParen, len: 1 } + Token { kind: Semi, len: 1 } + Token { kind: Whitespace, len: 1 } + Token { kind: CloseBrace, len: 1 } + Token { kind: Whitespace, len: 1 } + "#]], + ) +} + #[test] fn comment_flavors() { check_lexing( @@ -143,25 +171,117 @@ fn comment_flavors() { /*! inner doc block */ ", expect![[r#" - Token { kind: Whitespace, len: 1 } - Token { kind: LineComment { doc_style: None }, len: 7 } - Token { kind: Whitespace, len: 1 } - Token { kind: LineComment { doc_style: None }, len: 17 } - Token { kind: Whitespace, len: 1 } - Token { kind: LineComment { doc_style: Some(Outer) }, len: 18 } - Token { kind: Whitespace, len: 1 } - Token { kind: LineComment { doc_style: Some(Inner) }, len: 18 } - Token { kind: Whitespace, len: 1 } - Token { kind: BlockComment { doc_style: None, terminated: true }, len: 11 } - Token { kind: Whitespace, len: 1 } - Token { kind: BlockComment { doc_style: None, terminated: true }, len: 4 } - Token { kind: Whitespace, len: 1 } - Token { kind: BlockComment { doc_style: None, terminated: true }, len: 18 } - Token { kind: Whitespace, len: 1 } - Token { kind: BlockComment { doc_style: Some(Outer), terminated: true }, len: 22 } - Token { kind: Whitespace, len: 1 } - Token { kind: BlockComment { doc_style: Some(Inner), terminated: true }, len: 22 } - Token { kind: Whitespace, len: 1 } - "#]], + Token { kind: Whitespace, len: 1 } + Token { kind: LineComment { doc_style: None }, len: 7 } + Token { kind: Whitespace, len: 1 } + Token { kind: LineComment { doc_style: None }, len: 17 } + Token { kind: Whitespace, len: 1 } + Token { kind: LineComment { doc_style: Some(Outer) }, len: 18 } + Token { kind: Whitespace, len: 1 } + Token { kind: LineComment { doc_style: Some(Inner) }, len: 18 } + Token { kind: Whitespace, len: 1 } + Token { kind: BlockComment { doc_style: None, terminated: true }, len: 11 } + Token { kind: Whitespace, len: 1 } + Token { kind: BlockComment { doc_style: None, terminated: true }, len: 4 } + Token { kind: Whitespace, len: 1 } + Token { kind: BlockComment { doc_style: None, terminated: true }, len: 18 } + Token { kind: Whitespace, len: 1 } + Token { kind: BlockComment { doc_style: Some(Outer), terminated: true }, len: 22 } + Token { kind: Whitespace, len: 1 } + Token { kind: BlockComment { doc_style: Some(Inner), terminated: true }, len: 22 } + Token { kind: Whitespace, len: 1 } + "#]], + ) +} + +#[test] +fn nested_block_comments() { + check_lexing( + "/* /* */ */'a'", + expect![[r#" + Token { kind: BlockComment { doc_style: None, terminated: true }, len: 11 } + Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 } + "#]], + ) +} + +#[test] +fn characters() { + check_lexing( + "'a' ' ' '\\n'", + expect![[r#" + Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 4 }, len: 4 } + "#]], + ); +} + +#[test] +fn lifetime() { + check_lexing( + "'abc", + expect![[r#" + Token { kind: Lifetime { starts_with_number: false }, len: 4 } + "#]], + ); +} + +#[test] +fn raw_string() { + check_lexing( + "r###\"\"#a\\b\x00c\"\"###", + expect![[r#" + Token { kind: Literal { kind: RawStr { n_hashes: 3, err: None }, suffix_start: 17 }, len: 17 } + "#]], + ) +} + +#[test] +fn literal_suffixes() { + check_lexing( + r####" +'a' +b'a' +"a" +b"a" +1234 +0b101 +0xABC +1.0 +1.0e10 +2us +r###"raw"###suffix +br###"raw"###suffix +"####, + expect![[r#" + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Char { terminated: true }, suffix_start: 3 }, len: 3 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Byte { terminated: true }, suffix_start: 4 }, len: 4 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Str { terminated: true }, suffix_start: 3 }, len: 3 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: ByteStr { terminated: true }, suffix_start: 4 }, len: 4 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Int { base: Decimal, empty_int: false }, suffix_start: 4 }, len: 4 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Int { base: Binary, empty_int: false }, suffix_start: 5 }, len: 5 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Int { base: Hexadecimal, empty_int: false }, suffix_start: 5 }, len: 5 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Float { base: Decimal, empty_exponent: false }, suffix_start: 3 }, len: 3 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Float { base: Decimal, empty_exponent: false }, suffix_start: 6 }, len: 6 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: Int { base: Decimal, empty_int: false }, suffix_start: 1 }, len: 3 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: RawStr { n_hashes: 3, err: None }, suffix_start: 12 }, len: 18 } + Token { kind: Whitespace, len: 1 } + Token { kind: Literal { kind: RawByteStr { n_hashes: 3, err: None }, suffix_start: 13 }, len: 19 } + Token { kind: Whitespace, len: 1 } + "#]], ) } From 518cac91902d34567ac8bfea3022f426a7de53f6 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 29 Aug 2020 11:00:59 +0200 Subject: [PATCH 2/2] Remove unused function --- Cargo.lock | 15 +-------------- compiler/rustc_parse/src/lexer/mod.rs | 4 ---- 2 files changed, 1 insertion(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d549396943392..ffc1f0dec1d9a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -342,7 +342,7 @@ dependencies = [ name = "cargo-miri" version = "0.1.0" dependencies = [ - "cargo_metadata 0.9.1", + "cargo_metadata 0.11.1", "directories", "rustc-workspace-hack", "rustc_version", @@ -391,18 +391,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "cargo_metadata" -version = "0.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46e3374c604fb39d1a2f35ed5e4a4e30e60d01fab49446e08f1b3e9a90aef202" -dependencies = [ - "semver 0.9.0", - "serde", - "serde_derive", - "serde_json", -] - [[package]] name = "cargo_metadata" version = "0.11.1" @@ -1991,7 +1979,6 @@ dependencies = [ name = "miri" version = "0.1.0" dependencies = [ - "byteorder", "colored", "compiletest_rs", "env_logger 0.7.1", diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index a65d344681924..c4ef35bc30c70 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -439,10 +439,6 @@ impl<'a> StringReader<'a> { (lit_kind, id) } - pub fn pos(&self) -> BytePos { - self.pos - } - #[inline] fn src_index(&self, pos: BytePos) -> usize { (pos - self.start_pos).to_usize()