Skip to content

Commit b80ef05

Browse files
committed
Handle unprocessable whitespace-related unicode characters
1 parent 2cb34ec commit b80ef05

File tree

2 files changed

+36
-17
lines changed

2 files changed

+36
-17
lines changed

Sources/SwiftFormat/PrettyPrint/WhitespaceLinter.swift

+13-17
Original file line numberDiff line numberDiff line change
@@ -312,13 +312,14 @@ public class WhitespaceLinter {
312312
formattedRun: ArraySlice<UTF8.CodeUnit>
313313
) {
314314
guard userRun != formattedRun else { return }
315-
315+
let userString = String(decoding: userRun, as: UTF8.self)
316+
let formattedString = String(decoding: formattedRun, as: UTF8.self)
316317
// This assumes tabs will always be forbidden for inter-token spacing (but not for leading
317318
// indentation).
318-
if userRun.contains(utf8Tab) {
319+
if userString.contains("\t") {
319320
diagnose(.spacingCharError, category: .spacingCharacter, utf8Offset: userIndex)
320-
} else if formattedRun.count != userRun.count {
321-
let delta = formattedRun.count - userRun.count
321+
} else if formattedString.count != userString.count {
322+
let delta = formattedString.count - userString.count
322323
diagnose(.spacingError(delta), category: .spacing, utf8Offset: userIndex)
323324
}
324325
}
@@ -339,20 +340,15 @@ public class WhitespaceLinter {
339340
startingAt offset: Int,
340341
in data: [UTF8.CodeUnit]
341342
) -> ArraySlice<UTF8.CodeUnit> {
342-
func isWhitespace(_ char: UTF8.CodeUnit) -> Bool {
343-
switch char {
344-
case UInt8(ascii: " "), UInt8(ascii: "\n"), UInt8(ascii: "\t"), UInt8(ascii: "\r"), /*VT*/ 0x0B, /*FF*/ 0x0C:
345-
return true
346-
default:
347-
return false
348-
}
349-
}
350-
guard
351-
let whitespaceEnd = data[offset...].firstIndex(where: { !isWhitespace($0) })
352-
else {
353-
return data[offset..<data.endIndex]
343+
// To ensure consistency with PrettyPrinter, the data is converted to a String
344+
// before processing whitespace.
345+
let substring = String(decoding: data[offset...], as: UTF8.self)
346+
var stringIndex = substring.startIndex
347+
while stringIndex < substring.endIndex, substring[stringIndex].isWhitespace {
348+
substring.formIndex(after: &stringIndex)
354349
}
355-
return data[offset..<whitespaceEnd]
350+
let utf8Count = substring.utf8.distance(from: substring.startIndex, to: stringIndex)
351+
return data[offset..<offset + utf8Count]
356352
}
357353

358354
/// Returns the code unit at the given index, or nil if the index is the end of the data.

Tests/SwiftFormatTests/PrettyPrint/WhitespaceLintTests.swift

+23
Original file line numberDiff line numberDiff line change
@@ -255,4 +255,27 @@ final class WhitespaceLintTests: WhitespaceTestCase {
255255
]
256256
)
257257
}
258+
259+
func testUnexpectedUnicodeCharacters() {
260+
assertWhitespaceLint(
261+
input: """
262+
// Hello World1️⃣\u{2028}
263+
// Hello2️⃣\u{20}\u{2028}World
264+
// Hello World3️⃣\u{2028}\u{2029}\u{2029}
265+
// Hello World4️⃣\u{20}\u{20}\u{20}\u{2028}
266+
""",
267+
expected: """
268+
// Hello World
269+
// Hello World
270+
// Hello World
271+
// Hello World
272+
""",
273+
findings: [
274+
FindingSpec("1️⃣", message: "remove trailing whitespace"),
275+
FindingSpec("2️⃣", message: "remove 1 space"),
276+
FindingSpec("3️⃣", message: "remove trailing whitespace"),
277+
FindingSpec("4️⃣", message: "remove 4 spaces"),
278+
]
279+
)
280+
}
258281
}

0 commit comments

Comments
 (0)