diff --git a/pom.xml b/pom.xml index 7dbc571..631446b 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ be.nbb.rd picocsv - 1.1.0 + 1.2.0 jar picocsv @@ -32,13 +32,13 @@ junit junit - 4.12 + 4.13 test org.assertj assertj-core - 3.14.0 + 3.15.0 test @@ -98,8 +98,8 @@ - 1.18.10 - 1.22 + 1.18.12 + 1.23 @@ -214,18 +214,60 @@ - + - optional-checks + enforce-dependency-rules + + + !skipEnforceDependencyRules + + + + + + org.apache.maven.plugins + maven-enforcer-plugin + 3.0.0-M3 + + + enforce + + + + 3.3.9 + + + + true + + + + + enforce + + + + + + + + + + + enforce-modern-api + + + !skipEnforceModernAPI + + org.gaul modernizer-maven-plugin - 2.0.0 + 2.1.0 1.8 - @@ -237,6 +279,20 @@ + + + + + + + enforce-code-coverage + + + !skipEnforceCodeCoverage + + + + org.jacoco jacoco-maven-plugin @@ -279,7 +335,7 @@ org.apache.maven.plugins maven-source-plugin - 3.2.0 + 3.2.1 verify @@ -312,7 +368,7 @@ org.apache.maven.plugins maven-source-plugin - 3.2.0 + 3.2.1 verify diff --git a/src/main/java/nbbrd/picocsv/Csv.java b/src/main/java/nbbrd/picocsv/Csv.java index 24d91af..9b1cb23 100644 --- a/src/main/java/nbbrd/picocsv/Csv.java +++ b/src/main/java/nbbrd/picocsv/Csv.java @@ -221,6 +221,64 @@ public Format build() { } } + /** + * Specifies the reader options. + */ + public static final class Parsing { + + public static final Parsing STRICT = new Parsing(false); + public static final Parsing LENIENT = new Parsing(true); + + private final boolean lenientSeparator; + + private Parsing(boolean lenientSeparator) { + this.lenientSeparator = lenientSeparator; + } + + /** + * Determine if the separator is parsed leniently or not. If set to + * true, the reader follows the same behavior as BufferedReader: a + * line is considered to be terminated by any one of a line feed ('\n'), + * a carriage return ('\r'), a carriage return followed immediately by a + * line feed, or by reaching the end-of-file (EOF). + * + * @return true if lenient parsing of separator, false otherwise + */ + public boolean isLenientSeparator() { + return lenientSeparator; + } + + @Override + public int hashCode() { + int hash = 7; + hash = 37 * hash + Boolean.hashCode(lenientSeparator); + return hash; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + final Parsing other = (Parsing) obj; + if (this.lenientSeparator != other.lenientSeparator) { + return false; + } + return true; + } + + @Override + public String toString() { + return "ReaderOptions{" + "lenientSeparator=" + lenientSeparator + '}'; + } + } + /** * Reads CSV files. */ @@ -238,9 +296,26 @@ public static final class Reader implements Closeable, CharSequence { * @throws IOException if an I/O error occurs */ public static Reader of(Path file, Charset encoding, Format format) throws IllegalArgumentException, IOException { + return of(file, encoding, format, Parsing.STRICT); + } + + /** + * Creates a new instance from a file. + * + * @param file a non-null file + * @param encoding a non-null encoding + * @param format a non-null format + * @param options non-null options + * @return a new CSV reader + * @throws IllegalArgumentException if the format contains an invalid + * combination of options + * @throws IOException if an I/O error occurs + */ + public static Reader of(Path file, Charset encoding, Format format, Parsing options) throws IllegalArgumentException, IOException { Objects.requireNonNull(file, "file"); Objects.requireNonNull(encoding, "encoding"); Objects.requireNonNull(format, "format"); + Objects.requireNonNull(options, "options"); if (!format.isValid()) { throw new IllegalArgumentException("format"); @@ -248,7 +323,7 @@ public static Reader of(Path file, Charset encoding, Format format) throws Illeg CharsetDecoder decoder = encoding.newDecoder(); BufferSizes sizes = BufferSizes.of(file, decoder); - return make(format, sizes.chars, newCharReader(file, decoder, sizes.bytes)); + return make(format, sizes.chars, newCharReader(file, decoder, sizes.bytes), options); } /** @@ -263,9 +338,26 @@ public static Reader of(Path file, Charset encoding, Format format) throws Illeg * @throws IOException if an I/O error occurs */ public static Reader of(InputStream stream, Charset encoding, Format format) throws IllegalArgumentException, IOException { + return of(stream, encoding, format, Parsing.STRICT); + } + + /** + * Creates a new instance from a stream. + * + * @param stream a non-null stream + * @param encoding a non-null encoding + * @param format a non-null format + * @param options non-null options + * @return a new CSV reader + * @throws IllegalArgumentException if the format contains an invalid + * combination of options + * @throws IOException if an I/O error occurs + */ + public static Reader of(InputStream stream, Charset encoding, Format format, Parsing options) throws IllegalArgumentException, IOException { Objects.requireNonNull(stream, "stream"); Objects.requireNonNull(encoding, "encoding"); Objects.requireNonNull(format, "format"); + Objects.requireNonNull(options, "options"); if (!format.isValid()) { throw new IllegalArgumentException("format"); @@ -273,7 +365,7 @@ public static Reader of(InputStream stream, Charset encoding, Format format) thr CharsetDecoder decoder = encoding.newDecoder(); BufferSizes sizes = BufferSizes.of(stream, decoder); - return make(format, sizes.chars, new InputStreamReader(stream, decoder)); + return make(format, sizes.chars, new InputStreamReader(stream, decoder), options); } /** @@ -287,23 +379,39 @@ public static Reader of(InputStream stream, Charset encoding, Format format) thr * @throws IOException if an I/O error occurs */ public static Reader of(java.io.Reader charReader, Format format) throws IllegalArgumentException, IOException { + return of(charReader, format, Parsing.STRICT); + } + + /** + * Creates a new instance from a char reader. + * + * @param charReader a non-null char reader + * @param format a non-null format + * @param options non-null options + * @return a new CSV reader + * @throws IllegalArgumentException if the format contains an invalid + * combination of options + * @throws IOException if an I/O error occurs + */ + public static Reader of(java.io.Reader charReader, Format format, Parsing options) throws IllegalArgumentException, IOException { Objects.requireNonNull(charReader, "charReader"); Objects.requireNonNull(format, "format"); + Objects.requireNonNull(options, "options"); if (!format.isValid()) { throw new IllegalArgumentException("format"); } BufferSizes sizes = BufferSizes.EMPTY; - return make(format, sizes.chars, charReader); + return make(format, sizes.chars, charReader, options); } - private static Reader make(Format format, OptionalInt charBufferSize, java.io.Reader charReader) { + private static Reader make(Format format, OptionalInt charBufferSize, java.io.Reader charReader, Parsing options) { int size = BufferSizes.getSize(charBufferSize, BufferSizes.DEFAULT_CHAR_BUFFER_SIZE); return new Reader( - format.getSeparator() == NewLine.WINDOWS ? new ReadAheadInput(charReader, size) : new Input(charReader, size), + ReadAheadInput.isNeeded(format, options) ? new ReadAheadInput(charReader, size) : new Input(charReader, size), format.getQuote(), format.getDelimiter(), - EndOfLineReader.of(format.getSeparator())); + EndOfLineReader.of(format, options)); } private final Input input; @@ -543,6 +651,10 @@ public int read() throws IOException { private static final class ReadAheadInput extends Input { + static boolean isNeeded(Format format, Parsing options) { + return options.isLenientSeparator() || format.getSeparator() == NewLine.WINDOWS; + } + private static final int NULL = -2; private int readAhead; @@ -578,24 +690,51 @@ private interface EndOfLineReader { static final int CR_CODE = NewLine.CR; static final int LF_CODE = NewLine.LF; - static EndOfLineReader of(NewLine newLine) { - switch (newLine) { + static EndOfLineReader of(Format format, Parsing options) { + if (options.isLenientSeparator()) { + return EndOfLineReader::isLenient; + } + switch (format.getSeparator()) { case MACINTOSH: - return (code, input) -> code == CR_CODE; + return EndOfLineReader::isMacintosh; case UNIX: - return (code, input) -> code == LF_CODE; + return EndOfLineReader::isUnix; case WINDOWS: - return (code, input) -> { - if (code == CR_CODE && ((ReadAheadInput) input).peek(LF_CODE)) { - ((ReadAheadInput) input).discardAheadOfTimeChar(); - return true; - } - return false; - }; + return EndOfLineReader::isWindows; default: throw new RuntimeException(); } } + + static boolean isLenient(int code, Input input) throws IOException { + switch (code) { + case LF_CODE: + return true; + case CR_CODE: + if (((ReadAheadInput) input).peek(LF_CODE)) { + ((ReadAheadInput) input).discardAheadOfTimeChar(); + } + return true; + default: + return false; + } + } + + static boolean isMacintosh(int code, Input input) throws IOException { + return code == CR_CODE; + } + + static boolean isUnix(int code, Input input) throws IOException { + return code == LF_CODE; + } + + static boolean isWindows(int code, Input input) throws IOException { + if (code == CR_CODE && ((ReadAheadInput) input).peek(LF_CODE)) { + ((ReadAheadInput) input).discardAheadOfTimeChar(); + return true; + } + return false; + } } private static java.io.Reader newCharReader(Path file, CharsetDecoder decoder, OptionalInt byteBufferSize) throws IOException { diff --git a/src/test/java/_test/QuickReader.java b/src/test/java/_test/QuickReader.java index 3ca35aa..2b215dc 100644 --- a/src/test/java/_test/QuickReader.java +++ b/src/test/java/_test/QuickReader.java @@ -36,9 +36,9 @@ public enum QuickReader { BYTE_ARRAY(StreamType.STREAM) { @Override - public T readValue(QuickReader.Parser parser, Charset encoding, Csv.Format format, String input) throws IOException { + public T readValue(QuickReader.Parser parser, Charset encoding, Csv.Format format, String input, Csv.Parsing options) throws IOException { try (InputStream stream = newInputStream(input, encoding)) { - try (Csv.Reader reader = Csv.Reader.of(stream, encoding, format)) { + try (Csv.Reader reader = Csv.Reader.of(stream, encoding, format, options)) { return parser.accept(reader); } } @@ -46,19 +46,19 @@ public T readValue(QuickReader.Parser parser, Charset encoding, Csv.Forma }, FILE(StreamType.FILE) { @Override - public T readValue(QuickReader.Parser parser, Charset encoding, Csv.Format format, String input) throws IOException { + public T readValue(QuickReader.Parser parser, Charset encoding, Csv.Format format, String input, Csv.Parsing options) throws IOException { Path file = newInputFile(input, encoding); - try (Csv.Reader reader = Csv.Reader.of(file, encoding, format)) { + try (Csv.Reader reader = Csv.Reader.of(file, encoding, format, options)) { return parser.accept(reader); } } }, FILE_STREAM(StreamType.STREAM) { @Override - public T readValue(QuickReader.Parser parser, Charset encoding, Csv.Format format, String input) throws IOException { + public T readValue(QuickReader.Parser parser, Charset encoding, Csv.Format format, String input, Csv.Parsing options) throws IOException { Path file = newInputFile(input, encoding); try (InputStream stream = Files.newInputStream(file)) { - try (Csv.Reader reader = Csv.Reader.of(stream, encoding, format)) { + try (Csv.Reader reader = Csv.Reader.of(stream, encoding, format, options)) { return parser.accept(reader); } } @@ -66,9 +66,9 @@ public T readValue(QuickReader.Parser parser, Charset encoding, Csv.Forma }, CHAR_READER(StreamType.OBJECT) { @Override - public T readValue(QuickReader.Parser parser, Charset encoding, Csv.Format format, String input) throws IOException { + public T readValue(QuickReader.Parser parser, Charset encoding, Csv.Format format, String input, Csv.Parsing options) throws IOException { try (Reader object = newCharReader(input)) { - try (Csv.Reader reader = Csv.Reader.of(object, format)) { + try (Csv.Reader reader = Csv.Reader.of(object, format, options)) { return parser.accept(reader); } } @@ -78,13 +78,13 @@ public T readValue(QuickReader.Parser parser, Charset encoding, Csv.Forma @lombok.Getter private final StreamType type; - abstract public T readValue(Parser parser, Charset encoding, Csv.Format format, String input) throws IOException; + abstract public T readValue(Parser parser, Charset encoding, Csv.Format format, String input, Csv.Parsing options) throws IOException; - public void read(VoidParser parser, Charset encoding, Csv.Format format, String input) throws IOException { + public void read(VoidParser parser, Charset encoding, Csv.Format format, String input, Csv.Parsing options) throws IOException { readValue(stream -> { parser.accept(stream); return null; - }, encoding, format, input); + }, encoding, format, input, options); } @FunctionalInterface diff --git a/src/test/java/_test/Sample.java b/src/test/java/_test/Sample.java index e7ef282..a2423da 100644 --- a/src/test/java/_test/Sample.java +++ b/src/test/java/_test/Sample.java @@ -51,6 +51,10 @@ public class Sample { private boolean withoutEOL; + public Sample withNewLine(Csv.NewLine newLine) { + return withFormat(getFormat().toBuilder().separator(newLine).build()); + } + public static final class Builder { public Builder rowOf(String... fields) { diff --git a/src/test/java/nbbrd/picocsv/CsvFormatTest.java b/src/test/java/nbbrd/picocsv/CsvFormatTest.java index 2894672..ee6c5c4 100644 --- a/src/test/java/nbbrd/picocsv/CsvFormatTest.java +++ b/src/test/java/nbbrd/picocsv/CsvFormatTest.java @@ -47,7 +47,9 @@ public void testEqualsAndHashcode() { assertThat(Csv.Format.DEFAULT) .isEqualTo(Csv.Format.DEFAULT) .hasSameHashCodeAs(Csv.Format.DEFAULT) - .isNotEqualTo(Csv.Format.EXCEL); + .isNotEqualTo(Csv.Format.EXCEL) + .isNotEqualTo(null) + .isNotEqualTo(""); } @Test diff --git a/src/test/java/nbbrd/picocsv/CsvParsingTest.java b/src/test/java/nbbrd/picocsv/CsvParsingTest.java new file mode 100644 index 0000000..8f6c257 --- /dev/null +++ b/src/test/java/nbbrd/picocsv/CsvParsingTest.java @@ -0,0 +1,44 @@ +/* + * Copyright 2019 National Bank of Belgium + * + * Licensed under the EUPL, Version 1.1 or - as soon they will be approved + * by the European Commission - subsequent versions of the EUPL (the "Licence"); + * You may not use this work except in compliance with the Licence. + * You may obtain a copy of the Licence at: + * + * http://ec.europa.eu/idabc/eupl + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Licence is distributed on an "AS IS" basis, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the Licence for the specific language governing permissions and + * limitations under the Licence. + */ +package nbbrd.picocsv; + +import static org.assertj.core.api.Assertions.assertThat; +import org.junit.Test; + +/** + * + * @author Philippe Charles + */ +public class CsvParsingTest { + + @Test + public void testEqualsAndHashcode() { + assertThat(Csv.Parsing.STRICT) + .isEqualTo(Csv.Parsing.STRICT) + .hasSameHashCodeAs(Csv.Parsing.STRICT) + .isNotEqualTo(Csv.Parsing.LENIENT) + .isNotEqualTo(null) + .isNotEqualTo(""); + } + + @Test + public void testToString() { + assertThat(Csv.Parsing.STRICT.toString()) + .isEqualTo(Csv.Parsing.STRICT.toString()) + .isNotEqualTo(Csv.Parsing.LENIENT.toString()); + } +} diff --git a/src/test/java/nbbrd/picocsv/CsvReaderTest.java b/src/test/java/nbbrd/picocsv/CsvReaderTest.java index 61fabd2..ef43cc4 100644 --- a/src/test/java/nbbrd/picocsv/CsvReaderTest.java +++ b/src/test/java/nbbrd/picocsv/CsvReaderTest.java @@ -58,6 +58,26 @@ public void testPathFactory() { assertThatIllegalArgumentException() .isThrownBy(() -> Csv.Reader.of(QuickReader.newInputFile("", UTF_8), UTF_8, ILLEGAL_FORMAT)) .withMessageContaining("format"); + + assertThatNullPointerException() + .isThrownBy(() -> Csv.Reader.of((Path) null, UTF_8, RFC4180, Csv.Parsing.STRICT)) + .withMessageContaining("file"); + + assertThatNullPointerException() + .isThrownBy(() -> Csv.Reader.of(QuickReader.newInputFile("", UTF_8), null, RFC4180, Csv.Parsing.STRICT)) + .withMessageContaining("encoding"); + + assertThatNullPointerException() + .isThrownBy(() -> Csv.Reader.of(QuickReader.newInputFile("", UTF_8), UTF_8, null, Csv.Parsing.STRICT)) + .withMessageContaining("format"); + + assertThatIllegalArgumentException() + .isThrownBy(() -> Csv.Reader.of(QuickReader.newInputFile("", UTF_8), UTF_8, ILLEGAL_FORMAT, Csv.Parsing.STRICT)) + .withMessageContaining("format"); + + assertThatNullPointerException() + .isThrownBy(() -> Csv.Reader.of(QuickReader.newInputFile("", UTF_8), UTF_8, RFC4180, null)) + .withMessageContaining("options"); } @Test @@ -77,6 +97,26 @@ public void testStreamFactory() { assertThatIllegalArgumentException() .isThrownBy(() -> Csv.Reader.of(QuickReader.newInputStream("", UTF_8), UTF_8, ILLEGAL_FORMAT)) .withMessageContaining("format"); + + assertThatNullPointerException() + .isThrownBy(() -> Csv.Reader.of((InputStream) null, UTF_8, RFC4180, Csv.Parsing.STRICT)) + .withMessageContaining("stream"); + + assertThatNullPointerException() + .isThrownBy(() -> Csv.Reader.of(QuickReader.newInputStream("", UTF_8), null, RFC4180, Csv.Parsing.STRICT)) + .withMessageContaining("encoding"); + + assertThatNullPointerException() + .isThrownBy(() -> Csv.Reader.of(QuickReader.newInputStream("", UTF_8), UTF_8, null, Csv.Parsing.STRICT)) + .withMessageContaining("format"); + + assertThatIllegalArgumentException() + .isThrownBy(() -> Csv.Reader.of(QuickReader.newInputStream("", UTF_8), UTF_8, ILLEGAL_FORMAT, Csv.Parsing.STRICT)) + .withMessageContaining("format"); + + assertThatNullPointerException() + .isThrownBy(() -> Csv.Reader.of(QuickReader.newInputStream("", UTF_8), UTF_8, RFC4180, null)) + .withMessageContaining("options"); } @Test @@ -92,6 +132,22 @@ public void testReaderFactory() { assertThatIllegalArgumentException() .isThrownBy(() -> Csv.Reader.of(QuickReader.newCharReader(""), ILLEGAL_FORMAT)) .withMessageContaining("format"); + + assertThatNullPointerException() + .isThrownBy(() -> Csv.Reader.of((Reader) null, RFC4180, Csv.Parsing.STRICT)) + .withMessageContaining("charReader"); + + assertThatNullPointerException() + .isThrownBy(() -> Csv.Reader.of(QuickReader.newCharReader(""), null, Csv.Parsing.STRICT)) + .withMessageContaining("format"); + + assertThatIllegalArgumentException() + .isThrownBy(() -> Csv.Reader.of(QuickReader.newCharReader(""), ILLEGAL_FORMAT, Csv.Parsing.STRICT)) + .withMessageContaining("format"); + + assertThatNullPointerException() + .isThrownBy(() -> Csv.Reader.of(QuickReader.newCharReader(""), RFC4180, null)) + .withMessageContaining("options"); } @Test @@ -99,7 +155,10 @@ public void testAllSamples() throws IOException { for (QuickReader reader : QuickReader.values()) { for (Charset encoding : Sample.CHARSETS) { for (Sample sample : Sample.SAMPLES) { - assertValid(reader, encoding, sample); + assertValid(reader, encoding, sample, Csv.Parsing.STRICT); + for (Csv.NewLine newLine : Csv.NewLine.values()) { + assertValid(reader, encoding, sample.withNewLine(newLine), Csv.Parsing.LENIENT); + } } } } @@ -113,11 +172,11 @@ public void testSkip() throws IOException { switch (sample.getRows().size()) { case 0: case 1: - assertThat(reader.readValue(this::skipFirst, encoding, sample.getFormat(), sample.getContent())) + assertThat(reader.readValue(this::skipFirst, encoding, sample.getFormat(), sample.getContent(), Csv.Parsing.STRICT)) .isEmpty(); break; default: - assertThat(reader.readValue(this::skipFirst, encoding, sample.getFormat(), sample.getContent())) + assertThat(reader.readValue(this::skipFirst, encoding, sample.getFormat(), sample.getContent(), Csv.Parsing.STRICT)) .element(0) .isEqualTo(sample.getRows().get(1)); break; @@ -140,7 +199,7 @@ public void testReadFieldBeforeLine() throws IOException { for (Charset encoding : Sample.CHARSETS) { for (Sample sample : Sample.SAMPLES) { assertThatIllegalStateException() - .isThrownBy(() -> reader.read(readFieldBeforeLine, encoding, sample.getFormat(), sample.getContent())); + .isThrownBy(() -> reader.read(readFieldBeforeLine, encoding, sample.getFormat(), sample.getContent(), Csv.Parsing.STRICT)); } } } @@ -157,7 +216,7 @@ public void testNonQuotedNonNewLineChar() throws IOException { .build(); for (QuickReader type : QuickReader.values()) { - assertValid(type, UTF_8, invalidButStillOk); + assertValid(type, UTF_8, invalidButStillOk, Csv.Parsing.STRICT); } } @@ -174,7 +233,7 @@ public void testReusableFieldOverflow() throws IOException { .build(); for (QuickReader type : QuickReader.values()) { - assertValid(type, UTF_8, overflow); + assertValid(type, UTF_8, overflow, Csv.Parsing.STRICT); } } @@ -234,11 +293,11 @@ public void testCharSequence() throws IOException { } private static void assertValid(Sample sample) throws IOException { - assertValid(QuickReader.CHAR_READER, UTF_8, sample); + assertValid(QuickReader.CHAR_READER, UTF_8, sample, Csv.Parsing.STRICT); } - private static void assertValid(QuickReader r, Charset encoding, Sample sample) throws IOException { - assertThat(r.readValue(Row::read, encoding, sample.getFormat(), sample.getContent())) + private static void assertValid(QuickReader r, Charset encoding, Sample sample, Csv.Parsing options) throws IOException { + assertThat(r.readValue(Row::read, encoding, sample.getFormat(), sample.getContent(), options)) .describedAs("Reading '%s' with '%s'", sample.getName(), r) .containsExactlyElementsOf(sample.getRows()); }