From e942337c5d11a03e9dabac63e549487bd1ce7285 Mon Sep 17 00:00:00 2001 From: TTOzzi Date: Sun, 14 May 2023 17:00:32 +0900 Subject: [PATCH 1/7] Add logic to check if previous token's trailing trivia contains newline characters --- Sources/SwiftParser/Lexer/Cursor.swift | 36 +++++++++++++++++++++----- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/Sources/SwiftParser/Lexer/Cursor.swift b/Sources/SwiftParser/Lexer/Cursor.swift index d2629341d7e..cb0484cf48d 100644 --- a/Sources/SwiftParser/Lexer/Cursor.swift +++ b/Sources/SwiftParser/Lexer/Cursor.swift @@ -150,6 +150,20 @@ extension Lexer.Cursor { case .inRegexLiteral: return false } } + + /// Returns whether the lexer is currently parsing the multiline string. + var isParsingMultilineString: Bool { + switch self { + case .normal, .preferRegexOverBinaryOperator: return false + case .afterRawStringDelimiter: return false + case .inStringLiteral(kind: let stringLiteralKind, delimiterLength: _): return stringLiteralKind == .multiLine + case .afterStringLiteral: return false + case .afterClosingStringQuote: return false + case .inStringInterpolationStart: return false + case .inStringInterpolation: return false + case .inRegexLiteral: return false + } + } } /// A data structure that holds the state stack entries in the lexer. It is @@ -242,6 +256,9 @@ extension Lexer { /// If we have already lexed a token, the kind of the previously lexed token var previousTokenKind: RawTokenKind? + + /// If we have already lexed a token, the `NewlinePresence` of the previously lexed token + var previousTokenNewlinePresence: NewlinePresence? /// If the `previousTokenKind` is `.keyword`, the keyword kind. Otherwise /// `nil`. @@ -434,23 +451,30 @@ extension Lexer.Cursor { if let stateTransition = result.stateTransition { self.stateStack.perform(stateTransition: stateTransition, stateAllocator: stateAllocator) } - + + var flags = result.flags + if newlineInLeadingTrivia == .present { + flags.insert(.isAtStartOfLine) + } + if let previousTokenNewlinePresence, previousTokenNewlinePresence == .present, + !currentState.isParsingMultilineString { + flags.insert(.isAtStartOfLine) + } + // Trailing trivia. let trailingTriviaStart = self if let trailingTriviaMode = result.trailingTriviaLexingMode ?? currentState.trailingTriviaLexingMode(cursor: self) { let triviaResult = self.lexTrivia(mode: trailingTriviaMode) + self.previousTokenNewlinePresence = triviaResult.newlinePresence diagnostic = TokenDiagnostic(combining: diagnostic, triviaResult.error?.tokenDiagnostic(tokenStart: cursor)) + } else { + self.previousTokenNewlinePresence = nil } if self.currentState.shouldPopStateWhenReachingNewlineInTrailingTrivia && self.is(at: "\r", "\n") { self.stateStack.perform(stateTransition: .pop, stateAllocator: stateAllocator) } - var flags = result.flags - if newlineInLeadingTrivia == .present { - flags.insert(.isAtStartOfLine) - } - diagnostic = TokenDiagnostic(combining: diagnostic, result.error?.tokenDiagnostic(tokenStart: cursor)) let lexeme = Lexer.Lexeme( From 28a5fd36c64bb98269bc9f87eba5fb6a02231a44 Mon Sep 17 00:00:00 2001 From: TTOzzi Date: Sun, 14 May 2023 20:59:11 +0900 Subject: [PATCH 2/7] Add test case --- Tests/SwiftParserTest/StatementTests.swift | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/Tests/SwiftParserTest/StatementTests.swift b/Tests/SwiftParserTest/StatementTests.swift index 3e67fd23ee2..b9716281b9a 100644 --- a/Tests/SwiftParserTest/StatementTests.swift +++ b/Tests/SwiftParserTest/StatementTests.swift @@ -718,4 +718,23 @@ final class StatementTests: XCTestCase { ] ) } + + func testTrailingTriviaIncludesNewline() { + assertParse( + """ + let a = 2/* + */let b = 3 + """ + ) + + assertParse( + """ + let a = 2/* + + + + */let b = 3 + """ + ) + } } From 7c779431d68301df9542d791a564cdf7133eb13a Mon Sep 17 00:00:00 2001 From: TTOzzi Date: Mon, 15 May 2023 23:47:44 +0900 Subject: [PATCH 3/7] Modify parsing logic so that a newline in a multiline string also has the isAtStartOfLine flag --- Sources/SwiftParser/Lexer/Cursor.swift | 37 ++++++++------------------ Tests/SwiftParserTest/LexerTests.swift | 12 ++++----- 2 files changed, 17 insertions(+), 32 deletions(-) diff --git a/Sources/SwiftParser/Lexer/Cursor.swift b/Sources/SwiftParser/Lexer/Cursor.swift index cb0484cf48d..8ee70775279 100644 --- a/Sources/SwiftParser/Lexer/Cursor.swift +++ b/Sources/SwiftParser/Lexer/Cursor.swift @@ -150,20 +150,6 @@ extension Lexer.Cursor { case .inRegexLiteral: return false } } - - /// Returns whether the lexer is currently parsing the multiline string. - var isParsingMultilineString: Bool { - switch self { - case .normal, .preferRegexOverBinaryOperator: return false - case .afterRawStringDelimiter: return false - case .inStringLiteral(kind: let stringLiteralKind, delimiterLength: _): return stringLiteralKind == .multiLine - case .afterStringLiteral: return false - case .afterClosingStringQuote: return false - case .inStringInterpolationStart: return false - case .inStringInterpolation: return false - case .inRegexLiteral: return false - } - } } /// A data structure that holds the state stack entries in the lexer. It is @@ -420,6 +406,15 @@ extension Lexer.Cursor { } else { newlineInLeadingTrivia = .absent } + + var flags: Lexer.Lexeme.Flags = [] + if newlineInLeadingTrivia == .present { + flags.insert(.isAtStartOfLine) + } + if let previousTokenNewlinePresence, previousTokenNewlinePresence == .present { + flags.insert(.isAtStartOfLine) + } + self.previousTokenNewlinePresence = nil // Token text. let textStart = self @@ -452,23 +447,12 @@ extension Lexer.Cursor { self.stateStack.perform(stateTransition: stateTransition, stateAllocator: stateAllocator) } - var flags = result.flags - if newlineInLeadingTrivia == .present { - flags.insert(.isAtStartOfLine) - } - if let previousTokenNewlinePresence, previousTokenNewlinePresence == .present, - !currentState.isParsingMultilineString { - flags.insert(.isAtStartOfLine) - } - // Trailing trivia. let trailingTriviaStart = self if let trailingTriviaMode = result.trailingTriviaLexingMode ?? currentState.trailingTriviaLexingMode(cursor: self) { let triviaResult = self.lexTrivia(mode: trailingTriviaMode) self.previousTokenNewlinePresence = triviaResult.newlinePresence diagnostic = TokenDiagnostic(combining: diagnostic, triviaResult.error?.tokenDiagnostic(tokenStart: cursor)) - } else { - self.previousTokenNewlinePresence = nil } if self.currentState.shouldPopStateWhenReachingNewlineInTrailingTrivia && self.is(at: "\r", "\n") { @@ -479,7 +463,7 @@ extension Lexer.Cursor { let lexeme = Lexer.Lexeme( tokenKind: result.tokenKind, - flags: flags, + flags: result.flags.union(flags), diagnostic: diagnostic, start: leadingTriviaStart.pointer, leadingTriviaLength: leadingTriviaStart.distance(to: textStart), @@ -1914,6 +1898,7 @@ extension Lexer.Cursor { if character == UInt8(ascii: "\r") { _ = self.advance(matching: "\n") } + self.previousTokenNewlinePresence = .present return Lexer.Result(.stringSegment, error: error) } else { // Single line literals cannot span multiple lines. diff --git a/Tests/SwiftParserTest/LexerTests.swift b/Tests/SwiftParserTest/LexerTests.swift index 021e7d9f874..a081aa60fe3 100644 --- a/Tests/SwiftParserTest/LexerTests.swift +++ b/Tests/SwiftParserTest/LexerTests.swift @@ -1182,9 +1182,9 @@ public class LexerTests: XCTestCase { """#, lexemes: [ LexemeSpec(.multilineStringQuote, leading: " ", text: #"""""#, trailing: "\n"), - LexemeSpec(.stringSegment, text: " line 1\n"), - LexemeSpec(.stringSegment, text: " line 2\n"), - LexemeSpec(.stringSegment, text: " "), + LexemeSpec(.stringSegment, text: " line 1\n", flags: .isAtStartOfLine), + LexemeSpec(.stringSegment, text: " line 2\n", flags: .isAtStartOfLine), + LexemeSpec(.stringSegment, text: " ", flags: .isAtStartOfLine), LexemeSpec(.multilineStringQuote, text: #"""""#), ] ) @@ -1198,9 +1198,9 @@ public class LexerTests: XCTestCase { """#, lexemes: [ LexemeSpec(.multilineStringQuote, leading: " ", text: #"""""#, trailing: "\n"), - LexemeSpec(.stringSegment, text: " line 1 ", trailing: "\\\n"), - LexemeSpec(.stringSegment, text: " line 2\n"), - LexemeSpec(.stringSegment, text: " "), + LexemeSpec(.stringSegment, text: " line 1 ", trailing: "\\\n", flags: .isAtStartOfLine), + LexemeSpec(.stringSegment, text: " line 2\n", flags: .isAtStartOfLine), + LexemeSpec(.stringSegment, text: " ", flags: .isAtStartOfLine), LexemeSpec(.multilineStringQuote, text: #"""""#), ] ) From faa9402fd223ff4343c829a2f712309fd3e5307f Mon Sep 17 00:00:00 2001 From: TTOzzi Date: Tue, 16 May 2023 00:03:11 +0900 Subject: [PATCH 4/7] Rename the property that stores the previous parse value --- Sources/SwiftParser/Lexer/Cursor.swift | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Sources/SwiftParser/Lexer/Cursor.swift b/Sources/SwiftParser/Lexer/Cursor.swift index 8ee70775279..2d939f5d065 100644 --- a/Sources/SwiftParser/Lexer/Cursor.swift +++ b/Sources/SwiftParser/Lexer/Cursor.swift @@ -243,8 +243,8 @@ extension Lexer { /// If we have already lexed a token, the kind of the previously lexed token var previousTokenKind: RawTokenKind? - /// If we have already lexed a token, the `NewlinePresence` of the previously lexed token - var previousTokenNewlinePresence: NewlinePresence? + /// If we have already lexed a token, stores whether the previous lexeme‘s ending contains a newline. + var previousLexemeTrailingNewlinePresence: NewlinePresence? /// If the `previousTokenKind` is `.keyword`, the keyword kind. Otherwise /// `nil`. @@ -411,10 +411,10 @@ extension Lexer.Cursor { if newlineInLeadingTrivia == .present { flags.insert(.isAtStartOfLine) } - if let previousTokenNewlinePresence, previousTokenNewlinePresence == .present { + if let previousLexemeTrailingNewlinePresence, previousLexemeTrailingNewlinePresence == .present { flags.insert(.isAtStartOfLine) } - self.previousTokenNewlinePresence = nil + self.previousLexemeTrailingNewlinePresence = nil // Token text. let textStart = self @@ -451,7 +451,7 @@ extension Lexer.Cursor { let trailingTriviaStart = self if let trailingTriviaMode = result.trailingTriviaLexingMode ?? currentState.trailingTriviaLexingMode(cursor: self) { let triviaResult = self.lexTrivia(mode: trailingTriviaMode) - self.previousTokenNewlinePresence = triviaResult.newlinePresence + self.previousLexemeTrailingNewlinePresence = triviaResult.newlinePresence diagnostic = TokenDiagnostic(combining: diagnostic, triviaResult.error?.tokenDiagnostic(tokenStart: cursor)) } @@ -1898,7 +1898,7 @@ extension Lexer.Cursor { if character == UInt8(ascii: "\r") { _ = self.advance(matching: "\n") } - self.previousTokenNewlinePresence = .present + self.previousLexemeTrailingNewlinePresence = .present return Lexer.Result(.stringSegment, error: error) } else { // Single line literals cannot span multiple lines. From 34fd5532e0296704b9323657b66102760bf06da3 Mon Sep 17 00:00:00 2001 From: TTOzzi Date: Tue, 16 May 2023 01:40:58 +0900 Subject: [PATCH 5/7] Add logic to check for newline during token text lexing --- Sources/SwiftParser/Lexer/Cursor.swift | 39 +++++++++++++++----------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/Sources/SwiftParser/Lexer/Cursor.swift b/Sources/SwiftParser/Lexer/Cursor.swift index 2d939f5d065..033b1d41ec4 100644 --- a/Sources/SwiftParser/Lexer/Cursor.swift +++ b/Sources/SwiftParser/Lexer/Cursor.swift @@ -321,6 +321,8 @@ extension Lexer { /// If `tokenKind` is `.keyword`, the kind of keyword produced, otherwise /// `nil`. let keywordKind: Keyword? + /// Indicates whether the lexed token text contains a newline. + let newlinePresence: Lexer.Cursor.NewlinePresence private init( _ tokenKind: RawTokenKind, @@ -328,7 +330,8 @@ extension Lexer { error: Cursor.LexingDiagnostic?, stateTransition: StateTransition?, trailingTriviaLexingMode: Lexer.Cursor.TriviaLexingMode?, - keywordKind: Keyword? + keywordKind: Keyword?, + newlinePresence: Lexer.Cursor.NewlinePresence ) { self.tokenKind = tokenKind self.flags = flags @@ -336,6 +339,7 @@ extension Lexer { self.stateTransition = stateTransition self.trailingTriviaLexingMode = trailingTriviaLexingMode self.keywordKind = keywordKind + self.newlinePresence = newlinePresence } /// Create a lexer result. Note that keywords should use `Result.keyword` @@ -345,7 +349,8 @@ extension Lexer { flags: Lexer.Lexeme.Flags = [], error: Cursor.LexingDiagnostic? = nil, stateTransition: StateTransition? = nil, - trailingTriviaLexingMode: Lexer.Cursor.TriviaLexingMode? = nil + trailingTriviaLexingMode: Lexer.Cursor.TriviaLexingMode? = nil, + newlinePresence: Lexer.Cursor.NewlinePresence = .absent ) { precondition(tokenKind != .keyword, "Use Result.keyword instead") self.init( @@ -354,7 +359,8 @@ extension Lexer { error: error, stateTransition: stateTransition, trailingTriviaLexingMode: trailingTriviaLexingMode, - keywordKind: nil + keywordKind: nil, + newlinePresence: newlinePresence ) } @@ -366,7 +372,8 @@ extension Lexer { error: nil, stateTransition: nil, trailingTriviaLexingMode: nil, - keywordKind: kind + keywordKind: kind, + newlinePresence: .absent ) } } @@ -406,15 +413,6 @@ extension Lexer.Cursor { } else { newlineInLeadingTrivia = .absent } - - var flags: Lexer.Lexeme.Flags = [] - if newlineInLeadingTrivia == .present { - flags.insert(.isAtStartOfLine) - } - if let previousLexemeTrailingNewlinePresence, previousLexemeTrailingNewlinePresence == .present { - flags.insert(.isAtStartOfLine) - } - self.previousLexemeTrailingNewlinePresence = nil // Token text. let textStart = self @@ -442,6 +440,16 @@ extension Lexer.Cursor { case .inRegexLiteral(let index, let lexemes): result = lexInRegexLiteral(lexemes.pointee[index...], existingPtr: lexemes) } + + var flags = result.flags + if newlineInLeadingTrivia == .present { + flags.insert(.isAtStartOfLine) + } + if let previousLexemeTrailingNewlinePresence, previousLexemeTrailingNewlinePresence == .present { + flags.insert(.isAtStartOfLine) + } + + self.previousLexemeTrailingNewlinePresence = result.newlinePresence if let stateTransition = result.stateTransition { self.stateStack.perform(stateTransition: stateTransition, stateAllocator: stateAllocator) @@ -463,7 +471,7 @@ extension Lexer.Cursor { let lexeme = Lexer.Lexeme( tokenKind: result.tokenKind, - flags: result.flags.union(flags), + flags: flags, diagnostic: diagnostic, start: leadingTriviaStart.pointer, leadingTriviaLength: leadingTriviaStart.distance(to: textStart), @@ -1898,8 +1906,7 @@ extension Lexer.Cursor { if character == UInt8(ascii: "\r") { _ = self.advance(matching: "\n") } - self.previousLexemeTrailingNewlinePresence = .present - return Lexer.Result(.stringSegment, error: error) + return Lexer.Result(.stringSegment, error: error, newlinePresence: .present) } else { // Single line literals cannot span multiple lines. // Terminate the string here and go back to normal lexing (instead of `afterStringLiteral`) From fc0f82932fefd7c6eaeb02e8b6357d8840214c0d Mon Sep 17 00:00:00 2001 From: TTOzzi Date: Tue, 16 May 2023 01:50:38 +0900 Subject: [PATCH 6/7] Formatting --- Sources/SwiftParser/Lexer/Cursor.swift | 8 +++---- Tests/SwiftParserTest/StatementTests.swift | 26 +++++++++++----------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/Sources/SwiftParser/Lexer/Cursor.swift b/Sources/SwiftParser/Lexer/Cursor.swift index 033b1d41ec4..01fe8aac5ee 100644 --- a/Sources/SwiftParser/Lexer/Cursor.swift +++ b/Sources/SwiftParser/Lexer/Cursor.swift @@ -242,7 +242,7 @@ extension Lexer { /// If we have already lexed a token, the kind of the previously lexed token var previousTokenKind: RawTokenKind? - + /// If we have already lexed a token, stores whether the previous lexeme‘s ending contains a newline. var previousLexemeTrailingNewlinePresence: NewlinePresence? @@ -440,7 +440,7 @@ extension Lexer.Cursor { case .inRegexLiteral(let index, let lexemes): result = lexInRegexLiteral(lexemes.pointee[index...], existingPtr: lexemes) } - + var flags = result.flags if newlineInLeadingTrivia == .present { flags.insert(.isAtStartOfLine) @@ -448,13 +448,13 @@ extension Lexer.Cursor { if let previousLexemeTrailingNewlinePresence, previousLexemeTrailingNewlinePresence == .present { flags.insert(.isAtStartOfLine) } - + self.previousLexemeTrailingNewlinePresence = result.newlinePresence if let stateTransition = result.stateTransition { self.stateStack.perform(stateTransition: stateTransition, stateAllocator: stateAllocator) } - + // Trailing trivia. let trailingTriviaStart = self if let trailingTriviaMode = result.trailingTriviaLexingMode ?? currentState.trailingTriviaLexingMode(cursor: self) { diff --git a/Tests/SwiftParserTest/StatementTests.swift b/Tests/SwiftParserTest/StatementTests.swift index b9716281b9a..260a0be0462 100644 --- a/Tests/SwiftParserTest/StatementTests.swift +++ b/Tests/SwiftParserTest/StatementTests.swift @@ -718,23 +718,23 @@ final class StatementTests: XCTestCase { ] ) } - + func testTrailingTriviaIncludesNewline() { assertParse( - """ - let a = 2/* - */let b = 3 - """ + """ + let a = 2/* + */let b = 3 + """ ) - + assertParse( - """ - let a = 2/* - - - - */let b = 3 - """ + """ + let a = 2/* + + + + */let b = 3 + """ ) } } From 32f9b2559cf5f8700f3bffc0c454d381a40c95f0 Mon Sep 17 00:00:00 2001 From: TTOzzi Date: Tue, 16 May 2023 01:58:27 +0900 Subject: [PATCH 7/7] Rename newlinePresence property of the Lexer Result --- Sources/SwiftParser/Lexer/Cursor.swift | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Sources/SwiftParser/Lexer/Cursor.swift b/Sources/SwiftParser/Lexer/Cursor.swift index 01fe8aac5ee..87906484503 100644 --- a/Sources/SwiftParser/Lexer/Cursor.swift +++ b/Sources/SwiftParser/Lexer/Cursor.swift @@ -321,8 +321,8 @@ extension Lexer { /// If `tokenKind` is `.keyword`, the kind of keyword produced, otherwise /// `nil`. let keywordKind: Keyword? - /// Indicates whether the lexed token text contains a newline. - let newlinePresence: Lexer.Cursor.NewlinePresence + /// Indicates whether the end of the lexed token text contains a newline. + let trailingNewlinePresence: Lexer.Cursor.NewlinePresence private init( _ tokenKind: RawTokenKind, @@ -331,7 +331,7 @@ extension Lexer { stateTransition: StateTransition?, trailingTriviaLexingMode: Lexer.Cursor.TriviaLexingMode?, keywordKind: Keyword?, - newlinePresence: Lexer.Cursor.NewlinePresence + trailingNewlinePresence: Lexer.Cursor.NewlinePresence ) { self.tokenKind = tokenKind self.flags = flags @@ -339,7 +339,7 @@ extension Lexer { self.stateTransition = stateTransition self.trailingTriviaLexingMode = trailingTriviaLexingMode self.keywordKind = keywordKind - self.newlinePresence = newlinePresence + self.trailingNewlinePresence = trailingNewlinePresence } /// Create a lexer result. Note that keywords should use `Result.keyword` @@ -350,7 +350,7 @@ extension Lexer { error: Cursor.LexingDiagnostic? = nil, stateTransition: StateTransition? = nil, trailingTriviaLexingMode: Lexer.Cursor.TriviaLexingMode? = nil, - newlinePresence: Lexer.Cursor.NewlinePresence = .absent + trailingNewlinePresence: Lexer.Cursor.NewlinePresence = .absent ) { precondition(tokenKind != .keyword, "Use Result.keyword instead") self.init( @@ -360,7 +360,7 @@ extension Lexer { stateTransition: stateTransition, trailingTriviaLexingMode: trailingTriviaLexingMode, keywordKind: nil, - newlinePresence: newlinePresence + trailingNewlinePresence: trailingNewlinePresence ) } @@ -373,7 +373,7 @@ extension Lexer { stateTransition: nil, trailingTriviaLexingMode: nil, keywordKind: kind, - newlinePresence: .absent + trailingNewlinePresence: .absent ) } } @@ -449,7 +449,7 @@ extension Lexer.Cursor { flags.insert(.isAtStartOfLine) } - self.previousLexemeTrailingNewlinePresence = result.newlinePresence + self.previousLexemeTrailingNewlinePresence = result.trailingNewlinePresence if let stateTransition = result.stateTransition { self.stateStack.perform(stateTransition: stateTransition, stateAllocator: stateAllocator) @@ -1906,7 +1906,7 @@ extension Lexer.Cursor { if character == UInt8(ascii: "\r") { _ = self.advance(matching: "\n") } - return Lexer.Result(.stringSegment, error: error, newlinePresence: .present) + return Lexer.Result(.stringSegment, error: error, trailingNewlinePresence: .present) } else { // Single line literals cannot span multiple lines. // Terminate the string here and go back to normal lexing (instead of `afterStringLiteral`)