Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for nested serialization of arrays #64

Merged
merged 3 commits into from
Aug 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions CSVFile.nuspec
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<package >
<metadata>
<id>CSVFile</id>
<version>3.1.2</version>
<version>3.1.3</version>
<title>CSVFile</title>
<authors>Ted Spence</authors>
<owners>Ted Spence</owners>
Expand All @@ -15,10 +15,10 @@
<releaseNotes>
July 18, 2023

* Fix issue with inconsistent handling of embedded newlines in the streaming version of the reader
* Add serialization options for arrays
</releaseNotes>
<readme>docs/README.md</readme>
<copyright>Copyright 2006 - 2023</copyright>
<copyright>Copyright 2006 - 2024</copyright>
<tags>fast csv parser serialization deserialization streaming async</tags>
<repository type="git" url="https://github.com/tspence/csharp-csv-reader" />
<dependencies>
Expand Down
65 changes: 62 additions & 3 deletions src/CSV.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
* Home page: https://github.com/tspence/csharp-csv-reader
*/
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Text;
Expand All @@ -25,7 +26,7 @@
/// <summary>
/// Root class that contains static functions for straightforward CSV parsing
/// </summary>
public static class CSV

Check warning on line 29 in src/CSV.cs

View workflow job for this annotation

GitHub Actions / build-ubuntu

Rename class 'CSV' to match pascal case naming rules, consider using 'Csv'.
{
/// <summary>
/// Use this to determine what version of DotNet was used to build this library
Expand Down Expand Up @@ -205,7 +206,7 @@
/// <param name="list">The array of objects to serialize</param>
/// <param name="settings">The CSV settings to use when exporting this array (Default: CSV)</param>
/// <returns>The completed CSV string representing one line per element in list</returns>
public static string Serialize<T>(IEnumerable<T> list, CSVSettings settings = null) where T : class, new()

Check warning on line 209 in src/CSV.cs

View workflow job for this annotation

GitHub Actions / build-ubuntu

All 'Serialize' method overloads should be adjacent.
{
if (settings == null)
{
Expand Down Expand Up @@ -339,7 +340,7 @@
/// <param name="riskyChars"></param>
/// <param name="forceQualifierTypes"></param>
/// <returns></returns>
internal static string ItemsToCsv(IEnumerable<object> items, CSVSettings settings, char[] riskyChars, Dictionary<Type, int> forceQualifierTypes)
internal static string ItemsToCsv(IEnumerable items, CSVSettings settings, char[] riskyChars, Dictionary<Type, int> forceQualifierTypes)
{
var sb = new StringBuilder();
foreach (var item in items)
Expand All @@ -355,12 +356,67 @@
continue;
}

// Is this a date time?
// Special cases for other types of serialization
string s;
var itemType = item.GetType();
var interfaces = itemType.GetInterfaces();
bool isEnumerable = false;
if (itemType != typeof(string))
{
foreach (var itemInterface in interfaces)
{
if (itemInterface == typeof(IEnumerable))
{
isEnumerable = true;
}
}
}

if (item is DateTime)
{
s = ((DateTime)item).ToString(settings.DateTimeFormat);
}
else if (isEnumerable)
{
IEnumerable enumerable = item as IEnumerable;
s = string.Empty;
switch (settings.NestedArrayBehavior)
{
case ArrayOptions.ToString:
s = item.ToString();
break;
case ArrayOptions.CountItems:
// from https://stackoverflow.com/questions/3546051/how-to-invoke-system-linq-enumerable-count-on-ienumerablet-using-reflection
if (enumerable != null)
{
int enumerableCount = 0;
var iter = enumerable.GetEnumerator();
using (iter as IDisposable)
{
while (iter.MoveNext())
{
enumerableCount++;
}
}

s = enumerableCount.ToString();
}

break;
case ArrayOptions.TreatAsNull:
if (settings.AllowNull)
{
s = settings.NullToken;
}
break;
case ArrayOptions.RecursiveSerialization:
if (enumerable != null)
{
s = ItemsToCsv(enumerable, settings, riskyChars, forceQualifierTypes);
}
break;
}
}
else
{
s = item.ToString();
Expand Down Expand Up @@ -399,7 +455,10 @@
}

// Subtract the trailing delimiter so we don't inadvertently add an empty column at the end
sb.Length -= 1;
if (sb.Length > 0)
{
sb.Length -= 1;
}
return sb.ToString();
}

Expand Down
33 changes: 32 additions & 1 deletion src/CSVSettings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,36 @@

namespace CSVFile
{
/// <summary>
/// Defines the behavior of CSV serialization when a nested array is encountered
/// </summary>
public enum ArrayOptions
{
/// <summary>
/// Use built-in string conversion, which renders arrays as `MyObject[]`
/// </summary>
ToString,

/// <summary>
/// Convert any array columns that are array types into nulls (either blanks or null tokens)
/// </summary>
TreatAsNull,

/// <summary>
/// Render the number of items in the array
/// </summary>
CountItems,

/// <summary>
/// Serialize child objects recursively using the same settings
/// </summary>
RecursiveSerialization,
}

/// <summary>
/// Settings to configure how a CSV file is parsed
/// </summary>
public class CSVSettings

Check warning on line 41 in src/CSVSettings.cs

View workflow job for this annotation

GitHub Actions / build-ubuntu

Rename class 'CSVSettings' to match pascal case naming rules, consider using 'CsvSettings'.
{
/// <summary>
/// The character used to delimit individual fields in the CSV.
Expand Down Expand Up @@ -133,11 +159,16 @@
/// </summary>
public string DateTimeFormat { get; set; } = "o";

/// <summary>
/// The behavior to use when serializing a column of an array type
/// </summary>
public ArrayOptions NestedArrayBehavior = ArrayOptions.TreatAsNull;

Check warning on line 165 in src/CSVSettings.cs

View workflow job for this annotation

GitHub Actions / build-ubuntu

Make this field 'private' and encapsulate it in a 'public' property.

/// <summary>
/// Standard comma-separated value (CSV) file settings
/// </summary>
public static readonly CSVSettings CSV = new CSVSettings();

/// <summary>
/// Standard comma-separated value (CSV) file settings that permit rendering of NULL values
/// </summary>
Expand Down
61 changes: 61 additions & 0 deletions tests/SerializationTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,16 @@ public class TestClassTwo
public EnumTestType? ThirdColumn;
}

public class TestClassThree
{
public string Name { get; set; }
public string[] StringArray { get; set; }
public List<int> IntList { get; set; }
public IEnumerable<bool> BoolEnumerable { get; set; }
public List<Guid> GuidList { get; set; }
public List<Guid> NullableList { get; set; }
}

[Test]
public void TestObjectSerialization()
{
Expand Down Expand Up @@ -125,6 +135,57 @@ public void TestNullSerialization()
}
}

/// <summary>
/// Arrays and child objects aren't well suited for complex serialization within a CSV file.
/// However, we have options:
/// * ToString just converts it to "MyClass[]"
/// * CountItems just produces the number of elements in the array
/// </summary>
[Test]
public void TestArraySerialization()
{
var list = new List<TestClassThree>();
list.Add(new TestClassThree()
{
Name = "Test",
StringArray = new [] { "a", "b", "c"},
IntList = new List<int> { 1, 2, 3 },
BoolEnumerable = new [] { true, false, true, false },
GuidList = new List<Guid>(),
});

// Serialize to a CSV string using ToString
// This was the default behavior in CSVFile 3.1.2 and earlier - it's pretty ugly!
var options = new CSVSettings()
{
HeaderRowIncluded = true,
NestedArrayBehavior = ArrayOptions.ToString,
NullToken = "NULL",
AllowNull = true,
};
var toStringCsv = CSV.Serialize(list, options);
Assert.AreEqual($"Name,StringArray,IntList,BoolEnumerable,GuidList,NullableList{Environment.NewLine}"
+ $"Test,System.String[],System.Collections.Generic.List`1[System.Int32],System.Boolean[],System.Collections.Generic.List`1[System.Guid],NULL{Environment.NewLine}", toStringCsv);

// Serialize to a CSV string using counts
options.NestedArrayBehavior = ArrayOptions.CountItems;
var countItemsCsv = CSV.Serialize(list, options);
Assert.AreEqual($"Name,StringArray,IntList,BoolEnumerable,GuidList,NullableList{Environment.NewLine}"
+ $"Test,3,3,4,0,NULL{Environment.NewLine}", countItemsCsv);

// Serialize to a CSV string using counts
options.NestedArrayBehavior = ArrayOptions.TreatAsNull;
var ignoreArraysCsv = CSV.Serialize(list, options);
Assert.AreEqual($"Name,StringArray,IntList,BoolEnumerable,GuidList,NullableList{Environment.NewLine}"
+ $"Test,NULL,NULL,NULL,NULL,NULL{Environment.NewLine}", ignoreArraysCsv);

// And now for the magic: Recursive serialization!
options.NestedArrayBehavior = ArrayOptions.RecursiveSerialization;
var recursiveCsv = CSV.Serialize(list, options);
Assert.AreEqual($"Name,StringArray,IntList,BoolEnumerable,GuidList,NullableList{Environment.NewLine}"
+ $"Test,\"a,b,c\",\"1,2,3\",\"True,False,True,False\",,NULL{Environment.NewLine}", recursiveCsv);
}

[Test]
public void TestCaseInsensitiveDeserializer()
{
Expand Down
Loading