Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improvement/record-too-large-exception #90

Merged
merged 3 commits into from
Nov 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 149 additions & 0 deletions RecordParser.Test/FileReaderTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,155 @@
public int Ranking;
}

[Theory]
// note: since new line is \n on unix and \r\n on windows
// string length will not be the same. so depending of the
// values passed, result may differ depending of the OS.
// current values passes in both.
[InlineData(900_000, true)]
[InlineData(1_100_000, false)]
public void Given_record_is_too_large_for_default_buffer_size_then_exception_should_be_throw(int innerRecords, bool enoughBuffer)
{
// Arrange

// construct a CSV with a header row
// and a *single* data row, where the 4th column contains a large inlined, CSV file enclosed in quotes.
// this is an extreme case, but is a valid CSV according to the spec.
var tw = new StringWriter();
tw.WriteLine("A,B,C,D");
tw.Write("1,2,3,\"");

for (int i = 0; i < innerRecords; i++)
{
tw.WriteLine("1,2,3,4");
}
// close the quoted field
tw.WriteLine("\"");

var fileContent = tw.ToString();
var reader = new StringReader(fileContent);

// Act

var options = new VariableLengthReaderRawOptions
{
HasHeader = true,
ContainsQuotedFields = true,
ColumnCount = 4,
Separator = ",",
ParallelismOptions = new()
{
Enabled = true,
MaxDegreeOfParallelism = 2
},
};

var act = () =>
{
var records = reader.ReadRecordsRaw(options, getField =>
{
var record = new
{
A = getField(0),
B = getField(1),
C = getField(2),
D = getField(3)
};
return record;
});

return records.ToList();
};

// Assert

if (enoughBuffer == false)
{
act.Should().Throw<RecordTooLargeException>().WithMessage("Record is too large.");
return;
}

var result = act();
result.Should().HaveCount(1);
var row = result[0];
row.A.Should().Be("1");
row.B.Should().Be("2");
row.C.Should().Be("3");

var start = fileContent.IndexOf('"') + 1;
var end = fileContent.LastIndexOf('"');
var innerCSV = fileContent.AsSpan(start, end - start);

row.D.Should().Be(innerCSV);
}

[Theory(Skip = "At the moment lib does not support customized buffer size")]
[InlineData(12, 4)]
[InlineData(13, 5)]
[InlineData(14, 7)]
[InlineData(15, 7)]
public void Given_record_is_too_large_for_custom_buffer_size_then_exception_should_be_throw(int bufferSize, int canRead)

Check warning on line 115 in RecordParser.Test/FileReaderTest.cs

View workflow job for this annotation

GitHub Actions / dotnet test

Theory method 'Given_record_is_too_large_for_custom_buffer_size_then_exception_should_be_throw' on test class 'FileReaderTest' does not use parameter 'bufferSize'. (https://xunit.github.io/xunit.analyzers/rules/xUnit1026)

Check warning on line 115 in RecordParser.Test/FileReaderTest.cs

View workflow job for this annotation

GitHub Actions / dotnet test

Theory method 'Given_record_is_too_large_for_custom_buffer_size_then_exception_should_be_throw' on test class 'FileReaderTest' does not use parameter 'bufferSize'. (https://xunit.github.io/xunit.analyzers/rules/xUnit1026)
{
// Arrange

var fileContent = """
A,B,C,D
1,2,3,4
5,6,7,8
9,10,11,12
13,14,15,16
87,88,89,100
89,99,100,101
88,89,90,91
""";

var expected = new[]
{
(1,2,3,4),
(5,6,7,8),
(9,10,11,12),
(13,14,15,16),
(87,88,89,100),
(89,99,100,101),
(88,89,90,91),
};

var reader = new StringReader(fileContent);

var parser = new VariableLengthReaderSequentialBuilder<(int A, int B, int C, int D)>()
.Map(x => x.A)
.Map(x => x.B)
.Map(x => x.C)
.Map(x => x.D)
.Build(",");

// Act

var results = new List<(int A, int B, int C, int D)>();
var act = () =>
{
var records = reader.ReadRecords(parser, new()
{
HasHeader = true,
// BufferSize = bufferSize,
});

foreach (var item in records)
results.Add(item);
};

// Assert

var bufferLargeEnoughToReadAll = canRead == expected.Length;

if (bufferLargeEnoughToReadAll)
act();
else
act.Should().Throw<RecordTooLargeException>().WithMessage("Record is too large.");

results.Should().BeEquivalentTo(expected.Take(canRead));
}

public static string GetFilePath(string fileName) => Path.Combine(Directory.GetCurrentDirectory(), fileName);

public static IEnumerable<object[]> Given_quoted_csv_file_should_read_quoted_properly_theory(string file)
Expand Down Expand Up @@ -74,7 +223,7 @@

[Theory]
[MemberData(nameof(Given_quoted_csv_file_should_read_quoted_properly_theory), new object[] { "AllFieldsQuotedCsv.csv" })]
public void Read_csv_file_all_fields_quoted(string fileContent, bool hasHeader, bool parallelProcessing, bool blankLineAtEnd, int repeat)

Check warning on line 226 in RecordParser.Test/FileReaderTest.cs

View workflow job for this annotation

GitHub Actions / dotnet test

Theory method 'Read_csv_file_all_fields_quoted' on test class 'FileReaderTest' does not use parameter 'blankLineAtEnd'. (https://xunit.github.io/xunit.analyzers/rules/xUnit1026)

Check warning on line 226 in RecordParser.Test/FileReaderTest.cs

View workflow job for this annotation

GitHub Actions / dotnet test

Theory method 'Read_csv_file_all_fields_quoted' on test class 'FileReaderTest' does not use parameter 'blankLineAtEnd'. (https://xunit.github.io/xunit.analyzers/rules/xUnit1026)
{
// Arrange

Expand Down Expand Up @@ -124,7 +273,7 @@

[Theory]
[MemberData(nameof(Given_quoted_csv_file_should_read_quoted_properly_theory), new object[] { "QuotedCsv.csv" })]
public void Read_quoted_csv_file(string fileContent, bool hasHeader, bool parallelProcessing, bool blankLineAtEnd, int repeat)

Check warning on line 276 in RecordParser.Test/FileReaderTest.cs

View workflow job for this annotation

GitHub Actions / dotnet test

Theory method 'Read_quoted_csv_file' on test class 'FileReaderTest' does not use parameter 'blankLineAtEnd'. (https://xunit.github.io/xunit.analyzers/rules/xUnit1026)

Check warning on line 276 in RecordParser.Test/FileReaderTest.cs

View workflow job for this annotation

GitHub Actions / dotnet test

Theory method 'Read_quoted_csv_file' on test class 'FileReaderTest' does not use parameter 'blankLineAtEnd'. (https://xunit.github.io/xunit.analyzers/rules/xUnit1026)
{
// Arrange

Expand Down Expand Up @@ -214,7 +363,7 @@

[Theory]
[MemberData(nameof(Given_not_quoted_csv_file_should_read_quoted_properly_theory))]
public void Read_not_quoted_csv_file(string fileContent, bool hasHeader, bool parallelProcessing, bool blankLineAtEnd, bool containgQuote, int repeat)

Check warning on line 366 in RecordParser.Test/FileReaderTest.cs

View workflow job for this annotation

GitHub Actions / dotnet test

Theory method 'Read_not_quoted_csv_file' on test class 'FileReaderTest' does not use parameter 'blankLineAtEnd'. (https://xunit.github.io/xunit.analyzers/rules/xUnit1026)

Check warning on line 366 in RecordParser.Test/FileReaderTest.cs

View workflow job for this annotation

GitHub Actions / dotnet test

Theory method 'Read_not_quoted_csv_file' on test class 'FileReaderTest' does not use parameter 'blankLineAtEnd'. (https://xunit.github.io/xunit.analyzers/rules/xUnit1026)
{
// Arrange

Expand Down Expand Up @@ -303,7 +452,7 @@

[Theory]
[MemberData(nameof(Given_fixed_length_file_should_read_quoted_properly_theory))]
public void Read_fixed_length_file(string fileContent, bool parallelProcessing, bool blankLineAtEnd, int repeat)

Check warning on line 455 in RecordParser.Test/FileReaderTest.cs

View workflow job for this annotation

GitHub Actions / dotnet test

Theory method 'Read_fixed_length_file' on test class 'FileReaderTest' does not use parameter 'blankLineAtEnd'. (https://xunit.github.io/xunit.analyzers/rules/xUnit1026)
{
// Arrange

Expand Down Expand Up @@ -388,7 +537,7 @@

[Theory]
[MemberData(nameof(Given_fixed_length_file_should_read_quoted_properly_theory))]
public void Read_plain_text_of_fixed_length_file(string fileContent, bool parallelProcessing, bool blankLineAtEnd, int repeat)

Check warning on line 540 in RecordParser.Test/FileReaderTest.cs

View workflow job for this annotation

GitHub Actions / dotnet test

Theory method 'Read_plain_text_of_fixed_length_file' on test class 'FileReaderTest' does not use parameter 'parallelProcessing'. (https://xunit.github.io/xunit.analyzers/rules/xUnit1026)

Check warning on line 540 in RecordParser.Test/FileReaderTest.cs

View workflow job for this annotation

GitHub Actions / dotnet test

Theory method 'Read_plain_text_of_fixed_length_file' on test class 'FileReaderTest' does not use parameter 'blankLineAtEnd'. (https://xunit.github.io/xunit.analyzers/rules/xUnit1026)

Check warning on line 540 in RecordParser.Test/FileReaderTest.cs

View workflow job for this annotation

GitHub Actions / dotnet test

Theory method 'Read_plain_text_of_fixed_length_file' on test class 'FileReaderTest' does not use parameter 'parallelProcessing'. (https://xunit.github.io/xunit.analyzers/rules/xUnit1026)

Check warning on line 540 in RecordParser.Test/FileReaderTest.cs

View workflow job for this annotation

GitHub Actions / dotnet test

Theory method 'Read_plain_text_of_fixed_length_file' on test class 'FileReaderTest' does not use parameter 'blankLineAtEnd'. (https://xunit.github.io/xunit.analyzers/rules/xUnit1026)

Check warning on line 540 in RecordParser.Test/FileReaderTest.cs

View workflow job for this annotation

GitHub Actions / dotnet test

Theory method 'Read_plain_text_of_fixed_length_file' on test class 'FileReaderTest' does not use parameter 'repeat'. (https://xunit.github.io/xunit.analyzers/rules/xUnit1026)
{
// Arrange

Expand Down
11 changes: 7 additions & 4 deletions RecordParser.Test/SpanExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,15 @@ public static string ToLower(this ReadOnlySpan<char> value)
}

// FluentAssertions does not support Span yet
public static StringAssertions Should(this Span<char> value)
=> value.ToString().Should();
public static StringAssertions Should(this Span<char> value) =>
value.ToString().Should();

// FluentAssertions does not support ReadOnlySpan yet
public static StringAssertions Should(this ReadOnlySpan<char> value)
=> value.ToString().Should();
public static StringAssertions Should(this ReadOnlySpan<char> value) =>
value.ToString().Should();

public static AndConstraint<StringAssertions> Be(this StringAssertions value, ReadOnlySpan<char> expected) =>
value.Be(expected.ToString());

public static readonly FuncSpanTIntBool ToUpperInvariant = (Span<char> span, ReadOnlySpan<char> text) =>
(text.ToUpperInvariant(span) is var written && written == text.Length, Math.Max(0, written));
Expand Down
16 changes: 16 additions & 0 deletions RecordParser/Extensions/FileReader/RecordTooLargeException.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
using System;

namespace RecordParser.Extensions
{
/// <summary>
/// The exception that is thrown when a single record is too large to fit in the read buffer.
/// </summary>
/// <remarks>
/// A possible cause is incorrectly formatted file.
/// At the moment, the library does not support to customize the buffer size.
/// </remarks>
public class RecordTooLargeException : Exception
{
public RecordTooLargeException(string Message) : base(Message) { }
}
}
3 changes: 3 additions & 0 deletions RecordParser/Extensions/FileReader/RowReaders/RowBy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ public int FillBuffer()
var len = i - j;
if (initial == false)
{
if (len == buffer.Length)
throw new RecordTooLargeException("Record is too large.");

Array.Copy(buffer, j, buffer, 0, len);
}

Expand Down
Loading