Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix serialization of embedded quotes. #111

Merged
merged 3 commits into from
Aug 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ Bugfixes:

- Strip byte order mark from Strings when importing so they don't become part of imported content's cells.
See #97 for discussion. (#103) -- @lardieri
- Respect alternate delimiters when serializing the CSV.
See #102 for discussion. (#107) -- @lardieri
- Escape any double-quotes embedded inside the field values when serializing the CSV.
See #111 for discussion. -- @lardieri

Other:

Expand Down
10 changes: 9 additions & 1 deletion SwiftCSV.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@
DFAD8B8028BC8B6F0042BB56 /* Serializer.swift in Sources */ = {isa = PBXBuildFile; fileRef = DFAD8B7A28B601EB0042BB56 /* Serializer.swift */; };
DFAD8B8128BC8B700042BB56 /* Serializer.swift in Sources */ = {isa = PBXBuildFile; fileRef = DFAD8B7A28B601EB0042BB56 /* Serializer.swift */; };
DFAD8B8228BC8B710042BB56 /* Serializer.swift in Sources */ = {isa = PBXBuildFile; fileRef = DFAD8B7A28B601EB0042BB56 /* Serializer.swift */; };
DFAD8B8428BC91D10042BB56 /* wonderland.csv in Resources */ = {isa = PBXBuildFile; fileRef = DFAD8B8328BC91D10042BB56 /* wonderland.csv */; };
DFAD8B8528BC91D10042BB56 /* wonderland.csv in Resources */ = {isa = PBXBuildFile; fileRef = DFAD8B8328BC91D10042BB56 /* wonderland.csv */; };
DFAD8B8628BC91D10042BB56 /* wonderland.csv in Resources */ = {isa = PBXBuildFile; fileRef = DFAD8B8328BC91D10042BB56 /* wonderland.csv */; };
E46085921CCB1E8F00385286 /* large.csv in Resources */ = {isa = PBXBuildFile; fileRef = E46085911CCB1E8F00385286 /* large.csv */; };
E46085941CCB1F5C00385286 /* PerformanceTest.swift in Sources */ = {isa = PBXBuildFile; fileRef = E46085931CCB1F5C00385286 /* PerformanceTest.swift */; };
F5C19F502283243C00920B06 /* ResourceHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = F5C19F4F2283243C00920B06 /* ResourceHelper.swift */; };
Expand Down Expand Up @@ -158,6 +161,7 @@
BE9B02D71CBE57B8009FE424 /* Parser.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Parser.swift; sourceTree = "<group>"; };
DF94FE452898F3A3008FD3F9 /* utf8_with_bom.csv */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = utf8_with_bom.csv; sourceTree = "<group>"; };
DFAD8B7A28B601EB0042BB56 /* Serializer.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Serializer.swift; sourceTree = "<group>"; };
DFAD8B8328BC91D10042BB56 /* wonderland.csv */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = wonderland.csv; sourceTree = "<group>"; };
E46085911CCB1E8F00385286 /* large.csv */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = large.csv; sourceTree = "<group>"; };
E46085931CCB1F5C00385286 /* PerformanceTest.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PerformanceTest.swift; sourceTree = "<group>"; };
F5C19F4F2283243C00920B06 /* ResourceHelper.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ResourceHelper.swift; sourceTree = "<group>"; };
Expand Down Expand Up @@ -286,10 +290,11 @@
BE06B67E1CB72680009578CC /* Res */ = {
isa = PBXGroup;
children = (
DF94FE452898F3A3008FD3F9 /* utf8_with_bom.csv */,
BE06B67C1CB7267B009578CC /* empty_fields.csv */,
BE06B6811CB7287F009578CC /* quotes.csv */,
E46085911CCB1E8F00385286 /* large.csv */,
DF94FE452898F3A3008FD3F9 /* utf8_with_bom.csv */,
DFAD8B8328BC91D10042BB56 /* wonderland.csv */,
F5C19F4F2283243C00920B06 /* ResourceHelper.swift */,
);
name = Res;
Expand Down Expand Up @@ -536,6 +541,7 @@
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
DFAD8B8428BC91D10042BB56 /* wonderland.csv in Resources */,
DF94FE462898F3A3008FD3F9 /* utf8_with_bom.csv in Resources */,
BE06B67D1CB7267B009578CC /* empty_fields.csv in Resources */,
BE06B6821CB7287F009578CC /* quotes.csv in Resources */,
Expand All @@ -554,6 +560,7 @@
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
DFAD8B8528BC91D10042BB56 /* wonderland.csv in Resources */,
DF94FE472898F3A3008FD3F9 /* utf8_with_bom.csv in Resources */,
5FB74BEA1CCB9325009DDBF1 /* empty_fields.csv in Resources */,
5FB74BEB1CCB9325009DDBF1 /* quotes.csv in Resources */,
Expand All @@ -572,6 +579,7 @@
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
DFAD8B8628BC91D10042BB56 /* wonderland.csv in Resources */,
DF94FE482898F3A3008FD3F9 /* utf8_with_bom.csv in Resources */,
5FB74BED1CCB932B009DDBF1 /* empty_fields.csv in Resources */,
5FB74BEE1CCB932B009DDBF1 /* quotes.csv in Resources */,
Expand Down
13 changes: 8 additions & 5 deletions SwiftCSV/Serializer.swift
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,16 @@ enum Serializer {

fileprivate extension String {

static let quote = "\""

func enquoted(whenContaining separator: String) -> String {
// Add quotes if value contains a delimiter
if self.contains(separator) {
return "\"\(self)\""
// If value contains a delimiter or quotes, double any embedded quotes and surround with quotes.
// For more information, see https://www.rfc-editor.org/rfc/rfc4180.html
if self.contains(separator) || self.contains(Self.quote) {
return Self.quote + self.replacingOccurrences(of: Self.quote, with: Self.quote + Self.quote) + Self.quote
} else {
return self
}

return self
}

}
31 changes: 31 additions & 0 deletions SwiftCSVTests/QuotedTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,35 @@ class QuotedTests: XCTestCase {
"age": "8"
])
}

func testEmbeddedQuotes() throws {
lardieri marked this conversation as resolved.
Show resolved Hide resolved
let csvURL = ResourceHelper.url(forResource: "wonderland", withExtension: "csv")!
csv = try CSV(url: csvURL)

/*
The test file:

Character,Quote
White Rabbit,"""Where shall I begin, please your Majesty?"" he asked."
King,"""Begin at the beginning,"" the King said gravely, ""and go on till you come to the end: then stop."""
March Hare,"""Do you mean that you think you can find out the answer to it?"" said the March Hare."

Notice there are no commas (delimiters) in the 3rd line.
For more information, see https://www.rfc-editor.org/rfc/rfc4180.html
*/

let expected = [
[ "Character" : "White Rabbit" , "Quote" : #""Where shall I begin, please your Majesty?" he asked."# ],
[ "Character" : "King" , "Quote" : #""Begin at the beginning," the King said gravely, "and go on till you come to the end: then stop.""# ],
[ "Character" : "March Hare" , "Quote" : #""Do you mean that you think you can find out the answer to it?" said the March Hare."# ]
]

for (index, row) in csv.rows.enumerated() {
XCTAssertEqual(expected[index], row)
}

let serialized = csv.serialized
let read = try String(contentsOf: csvURL, encoding: .utf8)
XCTAssertEqual(serialized, read)
}
}
8 changes: 4 additions & 4 deletions SwiftCSVTests/URLTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -61,9 +61,9 @@ class URLTests: XCTestCase {
}
}

func testUTF8() {
func testUTF8() throws {
let csvURL = ResourceHelper.url(forResource: "utf8_with_bom", withExtension: "csv")!
csv = try! CSV(url: csvURL)
csv = try CSV(url: csvURL)
lardieri marked this conversation as resolved.
Show resolved Hide resolved

XCTAssertFalse(csv.header.first!.hasPrefix("\u{FEFF}"))

Expand All @@ -80,9 +80,9 @@ class URLTests: XCTestCase {
}
}

func testUTF8Delimited() {
func testUTF8Delimited() throws {
let csvURL = ResourceHelper.url(forResource: "utf8_with_bom", withExtension: "csv")!
csv = try! CSV(url: csvURL, delimiter: .comma)
csv = try CSV(url: csvURL, delimiter: .comma)

XCTAssertFalse(csv.header.first!.hasPrefix("\u{FEFF}"))

Expand Down
4 changes: 4 additions & 0 deletions SwiftCSVTests/wonderland.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Character,Quote
White Rabbit,"""Where shall I begin, please your Majesty?"" he asked."
King,"""Begin at the beginning,"" the King said gravely, ""and go on till you come to the end: then stop."""
March Hare,"""Do you mean that you think you can find out the answer to it?"" said the March Hare."