From 08202fe94e73e00fff5204d1db36203b79fb582d Mon Sep 17 00:00:00 2001 From: Omri Hazan Date: Mon, 9 Dec 2019 14:44:51 -0800 Subject: [PATCH 1/2] Use LF directly in CSVLikeParser --- .../io/chrisdavenport/cormorant/parser/CSVLikeParser.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/parser/src/main/scala/io/chrisdavenport/cormorant/parser/CSVLikeParser.scala b/modules/parser/src/main/scala/io/chrisdavenport/cormorant/parser/CSVLikeParser.scala index da2817a0..1045f81e 100644 --- a/modules/parser/src/main/scala/io/chrisdavenport/cormorant/parser/CSVLikeParser.scala +++ b/modules/parser/src/main/scala/io/chrisdavenport/cormorant/parser/CSVLikeParser.scala @@ -87,7 +87,7 @@ abstract class CSVLikeParser(val separator: Char) { // Genuine CRLF or a Line Feed which is translated to a CRLF val PERMISSIVE_CRLF: Parser[(Char, Char)] = - (CRLF | char(lf).map((cr, _))).named("PERMISSIVE_CRLF") + (CRLF | LF.map((cr, _))).named("PERMISSIVE_CRLF") // COMMA = %x2C val SEPARATOR: Parser[Char] = char(separator).named("SEPARATOR") From afee2976ad289bfc9cd89ae6abe484f26b2cc9f9 Mon Sep 17 00:00:00 2001 From: Omri Hazan Date: Mon, 9 Dec 2019 14:45:44 -0800 Subject: [PATCH 2/2] Add support for CR-only line breaks in CSVLikeParser --- .../chrisdavenport/cormorant/parser/CSVLikeParser.scala | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/modules/parser/src/main/scala/io/chrisdavenport/cormorant/parser/CSVLikeParser.scala b/modules/parser/src/main/scala/io/chrisdavenport/cormorant/parser/CSVLikeParser.scala index 1045f81e..2820d671 100644 --- a/modules/parser/src/main/scala/io/chrisdavenport/cormorant/parser/CSVLikeParser.scala +++ b/modules/parser/src/main/scala/io/chrisdavenport/cormorant/parser/CSVLikeParser.scala @@ -10,7 +10,9 @@ import cats.implicits._ * https://tools.ietf.org/html/rfc4180 * * Deviations from the specification, here I have chosen to use a - * permissive CRLF that will accept a CRLF or a LF. + * permissive CRLF that will accept a CRLF, LF, or CR. + * Note that the CR is not directly in the initial spec, but in rare + * cases csvs can have this delimiter * * The important details are as follows * 1. Each record is located on a separate line, delimited by a line @@ -85,9 +87,9 @@ abstract class CSVLikeParser(val separator: Char) { // CRLF = CR LF ;as per section 6.1 of RFC 2234 [2] val CRLF: Parser[(Char, Char)] = CR ~ LF - // Genuine CRLF or a Line Feed which is translated to a CRLF + // Genuine CRLF, a Line Feed, or a CR which is translated to a CRLF val PERMISSIVE_CRLF: Parser[(Char, Char)] = - (CRLF | LF.map((cr, _))).named("PERMISSIVE_CRLF") + (CRLF | LF.map((cr, _)) | CR.map((_, lf))).named("PERMISSIVE_CRLF") // COMMA = %x2C val SEPARATOR: Parser[Char] = char(separator).named("SEPARATOR")