From ada55eb48f2afd6899b829a0b6ccb8c16afdffad Mon Sep 17 00:00:00 2001 From: lardieri Date: Mon, 29 Aug 2022 00:04:34 -0700 Subject: [PATCH 1/3] Fix serialization of embedded quotes. Embedded quotes in values must be doubled, and the entire value surrounded with quotes. --- SwiftCSV.xcodeproj/project.pbxproj | 10 +++++++++- SwiftCSV/Serializer.swift | 12 +++++++----- SwiftCSVTests/QuotedTests.swift | 30 ++++++++++++++++++++++++++++++ SwiftCSVTests/URLTests.swift | 8 ++++---- SwiftCSVTests/wonderland.csv | 4 ++++ 5 files changed, 54 insertions(+), 10 deletions(-) create mode 100644 SwiftCSVTests/wonderland.csv diff --git a/SwiftCSV.xcodeproj/project.pbxproj b/SwiftCSV.xcodeproj/project.pbxproj index 53bc7f4..0e01ec7 100644 --- a/SwiftCSV.xcodeproj/project.pbxproj +++ b/SwiftCSV.xcodeproj/project.pbxproj @@ -86,6 +86,9 @@ DFAD8B8028BC8B6F0042BB56 /* Serializer.swift in Sources */ = {isa = PBXBuildFile; fileRef = DFAD8B7A28B601EB0042BB56 /* Serializer.swift */; }; DFAD8B8128BC8B700042BB56 /* Serializer.swift in Sources */ = {isa = PBXBuildFile; fileRef = DFAD8B7A28B601EB0042BB56 /* Serializer.swift */; }; DFAD8B8228BC8B710042BB56 /* Serializer.swift in Sources */ = {isa = PBXBuildFile; fileRef = DFAD8B7A28B601EB0042BB56 /* Serializer.swift */; }; + DFAD8B8428BC91D10042BB56 /* wonderland.csv in Resources */ = {isa = PBXBuildFile; fileRef = DFAD8B8328BC91D10042BB56 /* wonderland.csv */; }; + DFAD8B8528BC91D10042BB56 /* wonderland.csv in Resources */ = {isa = PBXBuildFile; fileRef = DFAD8B8328BC91D10042BB56 /* wonderland.csv */; }; + DFAD8B8628BC91D10042BB56 /* wonderland.csv in Resources */ = {isa = PBXBuildFile; fileRef = DFAD8B8328BC91D10042BB56 /* wonderland.csv */; }; E46085921CCB1E8F00385286 /* large.csv in Resources */ = {isa = PBXBuildFile; fileRef = E46085911CCB1E8F00385286 /* large.csv */; }; E46085941CCB1F5C00385286 /* PerformanceTest.swift in Sources */ = {isa = PBXBuildFile; fileRef = E46085931CCB1F5C00385286 /* PerformanceTest.swift */; }; F5C19F502283243C00920B06 /* ResourceHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = F5C19F4F2283243C00920B06 /* ResourceHelper.swift */; }; @@ -158,6 +161,7 @@ BE9B02D71CBE57B8009FE424 /* Parser.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Parser.swift; sourceTree = ""; }; DF94FE452898F3A3008FD3F9 /* utf8_with_bom.csv */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = utf8_with_bom.csv; sourceTree = ""; }; DFAD8B7A28B601EB0042BB56 /* Serializer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Serializer.swift; sourceTree = ""; }; + DFAD8B8328BC91D10042BB56 /* wonderland.csv */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = wonderland.csv; sourceTree = ""; }; E46085911CCB1E8F00385286 /* large.csv */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = large.csv; sourceTree = ""; }; E46085931CCB1F5C00385286 /* PerformanceTest.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PerformanceTest.swift; sourceTree = ""; }; F5C19F4F2283243C00920B06 /* ResourceHelper.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ResourceHelper.swift; sourceTree = ""; }; @@ -286,10 +290,11 @@ BE06B67E1CB72680009578CC /* Res */ = { isa = PBXGroup; children = ( - DF94FE452898F3A3008FD3F9 /* utf8_with_bom.csv */, BE06B67C1CB7267B009578CC /* empty_fields.csv */, BE06B6811CB7287F009578CC /* quotes.csv */, E46085911CCB1E8F00385286 /* large.csv */, + DF94FE452898F3A3008FD3F9 /* utf8_with_bom.csv */, + DFAD8B8328BC91D10042BB56 /* wonderland.csv */, F5C19F4F2283243C00920B06 /* ResourceHelper.swift */, ); name = Res; @@ -536,6 +541,7 @@ isa = PBXResourcesBuildPhase; buildActionMask = 2147483647; files = ( + DFAD8B8428BC91D10042BB56 /* wonderland.csv in Resources */, DF94FE462898F3A3008FD3F9 /* utf8_with_bom.csv in Resources */, BE06B67D1CB7267B009578CC /* empty_fields.csv in Resources */, BE06B6821CB7287F009578CC /* quotes.csv in Resources */, @@ -554,6 +560,7 @@ isa = PBXResourcesBuildPhase; buildActionMask = 2147483647; files = ( + DFAD8B8528BC91D10042BB56 /* wonderland.csv in Resources */, DF94FE472898F3A3008FD3F9 /* utf8_with_bom.csv in Resources */, 5FB74BEA1CCB9325009DDBF1 /* empty_fields.csv in Resources */, 5FB74BEB1CCB9325009DDBF1 /* quotes.csv in Resources */, @@ -572,6 +579,7 @@ isa = PBXResourcesBuildPhase; buildActionMask = 2147483647; files = ( + DFAD8B8628BC91D10042BB56 /* wonderland.csv in Resources */, DF94FE482898F3A3008FD3F9 /* utf8_with_bom.csv in Resources */, 5FB74BED1CCB932B009DDBF1 /* empty_fields.csv in Resources */, 5FB74BEE1CCB932B009DDBF1 /* quotes.csv in Resources */, diff --git a/SwiftCSV/Serializer.swift b/SwiftCSV/Serializer.swift index 603694c..19512ce 100644 --- a/SwiftCSV/Serializer.swift +++ b/SwiftCSV/Serializer.swift @@ -34,13 +34,15 @@ enum Serializer { fileprivate extension String { + static let quote = "\"" + func enquoted(whenContaining separator: String) -> String { - // Add quotes if value contains a delimiter - if self.contains(separator) { - return "\"\(self)\"" + // If value contains a delimiter or quotes, double any embedded quotes and surround with quotes. + if self.contains(separator) || self.contains(Self.quote) { + return Self.quote + self.replacingOccurrences(of: Self.quote, with: Self.quote + Self.quote) + Self.quote + } else { + return self } - - return self } } diff --git a/SwiftCSVTests/QuotedTests.swift b/SwiftCSVTests/QuotedTests.swift index 94f2397..8fea50b 100644 --- a/SwiftCSVTests/QuotedTests.swift +++ b/SwiftCSVTests/QuotedTests.swift @@ -33,4 +33,34 @@ class QuotedTests: XCTestCase { "age": "8" ]) } + + func testEmbeddedQuotes() throws { + let csvURL = ResourceHelper.url(forResource: "wonderland", withExtension: "csv")! + csv = try CSV(url: csvURL) + + /* + The test file: + + Character,Quote + White Rabbit,"""Where shall I begin, please your Majesty?"" he asked." + King,"""Begin at the beginning,"" the King said gravely, ""and go on till you come to the end: then stop.""" + March Hare,"""Do you mean that you think you can find out the answer to it?"" said the March Hare." + + Notice there are no commas (delimiters) in the 3rd line. + */ + + let expected = [ + [ "Character" : "White Rabbit" , "Quote" : #""Where shall I begin, please your Majesty?" he asked."# ], + [ "Character" : "King" , "Quote" : #""Begin at the beginning," the King said gravely, "and go on till you come to the end: then stop.""# ], + [ "Character" : "March Hare" , "Quote" : #""Do you mean that you think you can find out the answer to it?" said the March Hare."# ] + ] + + for (index, row) in csv.rows.enumerated() { + XCTAssertEqual(expected[index], row) + } + + let serialized = csv.serialized + let read = try String(contentsOf: csvURL, encoding: .utf8) + XCTAssertEqual(serialized, read) + } } diff --git a/SwiftCSVTests/URLTests.swift b/SwiftCSVTests/URLTests.swift index 6293efe..5eb67ad 100644 --- a/SwiftCSVTests/URLTests.swift +++ b/SwiftCSVTests/URLTests.swift @@ -61,9 +61,9 @@ class URLTests: XCTestCase { } } - func testUTF8() { + func testUTF8() throws { let csvURL = ResourceHelper.url(forResource: "utf8_with_bom", withExtension: "csv")! - csv = try! CSV(url: csvURL) + csv = try CSV(url: csvURL) XCTAssertFalse(csv.header.first!.hasPrefix("\u{FEFF}")) @@ -80,9 +80,9 @@ class URLTests: XCTestCase { } } - func testUTF8Delimited() { + func testUTF8Delimited() throws { let csvURL = ResourceHelper.url(forResource: "utf8_with_bom", withExtension: "csv")! - csv = try! CSV(url: csvURL, delimiter: .comma) + csv = try CSV(url: csvURL, delimiter: .comma) XCTAssertFalse(csv.header.first!.hasPrefix("\u{FEFF}")) diff --git a/SwiftCSVTests/wonderland.csv b/SwiftCSVTests/wonderland.csv new file mode 100644 index 0000000..377556f --- /dev/null +++ b/SwiftCSVTests/wonderland.csv @@ -0,0 +1,4 @@ +Character,Quote +White Rabbit,"""Where shall I begin, please your Majesty?"" he asked." +King,"""Begin at the beginning,"" the King said gravely, ""and go on till you come to the end: then stop.""" +March Hare,"""Do you mean that you think you can find out the answer to it?"" said the March Hare." \ No newline at end of file From f465fca5d59a88f8c1f052157be34478a74aac80 Mon Sep 17 00:00:00 2001 From: lardieri Date: Tue, 30 Aug 2022 00:22:09 -0700 Subject: [PATCH 2/3] Link to RFC. Update the changelog. --- CHANGELOG.md | 4 ++++ SwiftCSVTests/QuotedTests.swift | 1 + 2 files changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cea1f24..10df561 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,10 @@ Bugfixes: - Strip byte order mark from Strings when importing so they don't become part of imported content's cells. See #97 for discussion. (#103) -- @lardieri +- Respect alternate delimiters when serializing the CSV. + See #102 for discussion. (#107) -- @lardieri +- Escape any double-quotes embedded inside the field values when serializing the CSV. + See #111 for discussion. -- @lardieri Other: diff --git a/SwiftCSVTests/QuotedTests.swift b/SwiftCSVTests/QuotedTests.swift index 8fea50b..6fdddf6 100644 --- a/SwiftCSVTests/QuotedTests.swift +++ b/SwiftCSVTests/QuotedTests.swift @@ -47,6 +47,7 @@ class QuotedTests: XCTestCase { March Hare,"""Do you mean that you think you can find out the answer to it?"" said the March Hare." Notice there are no commas (delimiters) in the 3rd line. + For more information, see https://www.rfc-editor.org/rfc/rfc4180.html */ let expected = [ From b306b250f10ae27f042d303a67fc823338364981 Mon Sep 17 00:00:00 2001 From: lardieri Date: Tue, 30 Aug 2022 00:38:03 -0700 Subject: [PATCH 3/3] Another link to RFC. --- SwiftCSV/Serializer.swift | 1 + 1 file changed, 1 insertion(+) diff --git a/SwiftCSV/Serializer.swift b/SwiftCSV/Serializer.swift index 19512ce..ebb98ea 100644 --- a/SwiftCSV/Serializer.swift +++ b/SwiftCSV/Serializer.swift @@ -38,6 +38,7 @@ fileprivate extension String { func enquoted(whenContaining separator: String) -> String { // If value contains a delimiter or quotes, double any embedded quotes and surround with quotes. + // For more information, see https://www.rfc-editor.org/rfc/rfc4180.html if self.contains(separator) || self.contains(Self.quote) { return Self.quote + self.replacingOccurrences(of: Self.quote, with: Self.quote + Self.quote) + Self.quote } else {