leandromoh · leandromoh · Nov 12, 2023 · Nov 11, 2023 · Nov 11, 2023 · Nov 11, 2023
diff --git a/RecordParser.Test/FileReaderTest.cs b/RecordParser.Test/FileReaderTest.cs
@@ -25,6 +25,155 @@
             public int Ranking;
         }
 
+        [Theory]
+        // note: since new line is \n on unix and \r\n on windows
+        // string length will not be the same. so depending of the 
+        // values passed, result may differ depending of the OS.
+        // current values passes in both.
+        [InlineData(900_000, true)]
+        [InlineData(1_100_000, false)]
+        public void Given_record_is_too_large_for_default_buffer_size_then_exception_should_be_throw(int innerRecords, bool enoughBuffer)
+        {
+            // Arrange
+
+            // construct a CSV with a header row
+            // and a *single* data row, where the 4th column contains a large inlined, CSV file enclosed in quotes.
+            // this is an extreme case, but is a valid CSV according to the spec. 
+            var tw = new StringWriter();
+            tw.WriteLine("A,B,C,D");
+            tw.Write("1,2,3,\"");
+
+            for (int i = 0; i < innerRecords; i++)
+            {
+                tw.WriteLine("1,2,3,4");
+            }
+            // close the quoted field
+            tw.WriteLine("\"");
+
+            var fileContent = tw.ToString();
+            var reader = new StringReader(fileContent);
+
+            // Act
+
+            var options = new VariableLengthReaderRawOptions
+            {
+                HasHeader = true,
+                ContainsQuotedFields = true,
+                ColumnCount = 4,
+                Separator = ",",
+                ParallelismOptions = new()
+                {
+                    Enabled = true,
+                    MaxDegreeOfParallelism = 2
+                },
+            };
+
+            var act = () =>
+            {
+                var records = reader.ReadRecordsRaw(options, getField =>
+                {
+                    var record = new
+                    {
+                        A = getField(0),
+                        B = getField(1),
+                        C = getField(2),
+                        D = getField(3)
+                    };
+                    return record;
+                });
+
+                return records.ToList();
+            };
+
+            // Assert
+
+            if (enoughBuffer == false)
+            {
+                act.Should().Throw<RecordTooLargeException>().WithMessage("Record is too large.");
+                return;
+            }
+
+            var result = act();
+            result.Should().HaveCount(1);
+            var row = result[0];
+            row.A.Should().Be("1");
+            row.B.Should().Be("2");
+            row.C.Should().Be("3");
+
+            var start = fileContent.IndexOf('"') + 1;
+            var end = fileContent.LastIndexOf('"');
+            var innerCSV = fileContent.AsSpan(start, end - start);
+
+            row.D.Should().Be(innerCSV);
+        }
+
+        [Theory(Skip = "At the moment lib does not support customized buffer size")]
+        [InlineData(12, 4)]
+        [InlineData(13, 5)]
+        [InlineData(14, 7)]
+        [InlineData(15, 7)]
+        public void Given_record_is_too_large_for_custom_buffer_size_then_exception_should_be_throw(int bufferSize, int canRead)
+        {
+            // Arrange
+
+            var fileContent = """
+                A,B,C,D
+                1,2,3,4
+                5,6,7,8
+                9,10,11,12
+                13,14,15,16
+                87,88,89,100
+                89,99,100,101
+                88,89,90,91
+                """;
+
+            var expected = new[]
+            {
+                (1,2,3,4),
+                (5,6,7,8),
+                (9,10,11,12),
+                (13,14,15,16),
+                (87,88,89,100),
+                (89,99,100,101),
+                (88,89,90,91),
+            };
+
+            var reader = new StringReader(fileContent);
+
+            var parser = new VariableLengthReaderSequentialBuilder<(int A, int B, int C, int D)>()
+                .Map(x => x.A)
+                .Map(x => x.B)
+                .Map(x => x.C)
+                .Map(x => x.D)
+                .Build(",");
+
+            // Act
+
+            var results = new List<(int A, int B, int C, int D)>();
+            var act = () =>
+            {
+                var records = reader.ReadRecords(parser, new()
+                {
+                    HasHeader = true,
+                //  BufferSize = bufferSize,
+                });
+
+                foreach (var item in records)
+                    results.Add(item);
+            };
+
+            // Assert
+
+            var bufferLargeEnoughToReadAll = canRead == expected.Length;
+
+            if (bufferLargeEnoughToReadAll)
+                act();
+            else
+                act.Should().Throw<RecordTooLargeException>().WithMessage("Record is too large.");
+
+            results.Should().BeEquivalentTo(expected.Take(canRead));
+        }
+
         public static string GetFilePath(string fileName) => Path.Combine(Directory.GetCurrentDirectory(), fileName);
 
         public static IEnumerable<object[]> Given_quoted_csv_file_should_read_quoted_properly_theory(string file)
@@ -74,7 +223,7 @@

        [Theory]
        [MemberData(nameof(Given_quoted_csv_file_should_read_quoted_properly_theory), new object[] { "AllFieldsQuotedCsv.csv" })]
        public void Read_csv_file_all_fields_quoted(string fileContent, bool hasHeader, bool parallelProcessing, bool blankLineAtEnd, int repeat)
        {
            // Arrange

@@ -124,7 +273,7 @@

        [Theory]
        [MemberData(nameof(Given_quoted_csv_file_should_read_quoted_properly_theory), new object[] { "QuotedCsv.csv" })]
        public void Read_quoted_csv_file(string fileContent, bool hasHeader, bool parallelProcessing, bool blankLineAtEnd, int repeat)
        {
            // Arrange

@@ -214,7 +363,7 @@

        [Theory]
        [MemberData(nameof(Given_not_quoted_csv_file_should_read_quoted_properly_theory))]
        public void Read_not_quoted_csv_file(string fileContent, bool hasHeader, bool parallelProcessing, bool blankLineAtEnd, bool containgQuote, int repeat)
        {
            // Arrange

@@ -303,7 +452,7 @@

        [Theory]
        [MemberData(nameof(Given_fixed_length_file_should_read_quoted_properly_theory))]
        public void Read_fixed_length_file(string fileContent, bool parallelProcessing, bool blankLineAtEnd, int repeat)
        {
            // Arrange

@@ -388,7 +537,7 @@

        [Theory]
        [MemberData(nameof(Given_fixed_length_file_should_read_quoted_properly_theory))]
        public void Read_plain_text_of_fixed_length_file(string fileContent, bool parallelProcessing, bool blankLineAtEnd, int repeat)
        {
            // Arrange


diff --git a/RecordParser.Test/SpanExtensions.cs b/RecordParser.Test/SpanExtensions.cs
@@ -22,12 +22,15 @@ public static string ToLower(this ReadOnlySpan<char> value)
         }
 
         // FluentAssertions does not support Span yet
-        public static StringAssertions Should(this Span<char> value)
-            => value.ToString().Should();
+        public static StringAssertions Should(this Span<char> value) =>
+            value.ToString().Should();
 
         // FluentAssertions does not support ReadOnlySpan yet
-        public static StringAssertions Should(this ReadOnlySpan<char> value)
-            => value.ToString().Should();
+        public static StringAssertions Should(this ReadOnlySpan<char> value) =>
+            value.ToString().Should();
+
+        public static AndConstraint<StringAssertions> Be(this StringAssertions value, ReadOnlySpan<char> expected) =>
+            value.Be(expected.ToString());
 
         public static readonly FuncSpanTIntBool ToUpperInvariant = (Span<char> span, ReadOnlySpan<char> text) =>
             (text.ToUpperInvariant(span) is var written && written == text.Length, Math.Max(0, written));

diff --git a/RecordParser/Extensions/FileReader/RecordTooLargeException.cs b/RecordParser/Extensions/FileReader/RecordTooLargeException.cs
@@ -0,0 +1,16 @@
+using System;
+
+namespace RecordParser.Extensions
+{
+    /// <summary>
+    /// The exception that is thrown when a single record is too large to fit in the read buffer.
+    /// </summary>
+    /// <remarks>
+    /// A possible cause is incorrectly formatted file. 
+    /// At the moment, the library does not support to customize the buffer size.
+    /// </remarks>
+    public class RecordTooLargeException : Exception
+    {
+        public RecordTooLargeException(string Message) : base(Message) { }
+    }
+}
diff --git a/RecordParser/Extensions/FileReader/RowReaders/RowBy.cs b/RecordParser/Extensions/FileReader/RowReaders/RowBy.cs
@@ -33,6 +33,9 @@ public int FillBuffer()
         var len = i - j;
         if (initial == false)
         {
+            if (len == buffer.Length)
+                throw new RecordTooLargeException("Record is too large.");
+
             Array.Copy(buffer, j, buffer, 0, len);
         }