Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

V2.7.1 release #77

Merged
merged 3 commits into from
Aug 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
<ManagePackageVersionsCentrally>true</ManagePackageVersionsCentrally>
</PropertyGroup>
<ItemGroup>
<PackageVersion Include="Newtonsoft.Json" Version="13.0.3" />
<PackageVersion Include="Apache.Arrow" Version="12.0.0" />
<PackageVersion Include="Parquet.Net" Version="4.11.0" />
<PackageVersion Include="Parquet.Net" Version="4.12.0" />
<PackageVersion Include="Microsoft.CSharp" Version="4.7.0" />
<PackageVersion Include="Microsoft.NET.Test.Sdk" Version="17.3.2" />
<PackageVersion Include="Microsoft.NET.Test.Sdk" Version="17.6.0" />
<PackageVersion Include="RichardSzalay.MockHttp" Version="6.0.0" />
<PackageVersion Include="xunit" Version="2.4.2" />
<PackageVersion Include="xunit.runner.visualstudio" Version="2.4.5" />
<PackageVersion Include="coverlet.collector" Version="3.1.2" />
<PackageVersion Include="coverlet.collector" Version="6.0.0" />
<PackageVersion Include="System.Data.DataSetExtensions" Version="4.5.0" />
</ItemGroup>
</Project>
6 changes: 4 additions & 2 deletions src/ParquetViewer.Engine/ParquetEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ public partial class ParquetEngine : IDisposable

public long RecordCount => _recordCount ??= _parquetFiles.Sum(pf => pf.Metadata?.NumRows ?? 0);

public int NumberOfPartitions => _parquetFiles.Count;

private ParquetReader DefaultReader => _parquetFiles.FirstOrDefault() ?? throw new Exception("No parquet readers available");

public List<string> Fields => DefaultReader.Schema.Fields.Select(f => f.Name).ToList() ?? new();
Expand All @@ -23,7 +25,7 @@ public partial class ParquetEngine : IDisposable
public ParquetSchema Schema => DefaultReader.Schema ?? new();

private ParquetSchemaElement? _parquetSchemaTree;
private ParquetSchemaElement ParquetSchemaTree => _parquetSchemaTree ??= BuildParquetSchemaTree();
public ParquetSchemaElement ParquetSchemaTree => _parquetSchemaTree ??= BuildParquetSchemaTree();

public string OpenFileOrFolderPath { get; }

Expand All @@ -46,7 +48,7 @@ private ParquetSchemaElement BuildParquetSchemaTree()
return thriftSchemaTree;
}

private ParquetSchemaElement ReadSchemaTree(ref List<SchemaElement>.Enumerator schemaElements)
private static ParquetSchemaElement ReadSchemaTree(ref List<SchemaElement>.Enumerator schemaElements)
{
if (!schemaElements.MoveNext())
throw new Exception("Invalid parquet schema");
Expand Down
2 changes: 1 addition & 1 deletion src/ParquetViewer.Engine/ParquetSchemaElement.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

namespace ParquetViewer.Engine
{
internal class ParquetSchemaElement
public class ParquetSchemaElement
{
public string Path => SchemaElement.Name;
public SchemaElement SchemaElement { get; set; }
Expand Down
Binary file not shown.
5 changes: 5 additions & 0 deletions src/ParquetViewer.Tests/ParquetViewer.Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" />
<PackageReference Include="RichardSzalay.MockHttp" />
<PackageReference Include="xunit" />
<PackageReference Include="xunit.runner.visualstudio">
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
Expand All @@ -33,6 +34,7 @@

<ItemGroup>
<ProjectReference Include="..\ParquetViewer.Engine\ParquetViewer.Engine.csproj" />
<ProjectReference Include="..\ParquetViewer\ParquetViewer.csproj" />
</ItemGroup>

<ItemGroup>
Expand Down Expand Up @@ -63,6 +65,9 @@
<None Update="Data\MAP_TYPE_TEST2.parquet">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Data\NULLABLE_GUID_TEST1.parquet">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Data\PARTITIONED_PARQUET_FILE_TEST1\bldgtype=B\bd8c129da60e412db4b21800b9e0b983.parquet">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
Expand Down
91 changes: 82 additions & 9 deletions src/ParquetViewer.Tests/SanityTests.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
using ParquetViewer.Analytics;
using ParquetViewer.Engine.Exceptions;
using RichardSzalay.MockHttp;
using System.Globalization;
using System.Net.Http.Json;
using System.Text.RegularExpressions;

namespace ParquetViewer.Tests
{
Expand All @@ -7,7 +12,7 @@ public class SanityTests
[Fact]
public async Task DECIMALS_AND_BOOLS_TEST()
{
var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/DECIMALS_AND_BOOLS_TEST1.parquet", default);
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/DECIMALS_AND_BOOLS_TEST1.parquet", default);

Assert.Equal(30, parquetEngine.RecordCount);
Assert.Equal(337, parquetEngine.Fields.Count);
Expand All @@ -23,7 +28,7 @@ public async Task DECIMALS_AND_BOOLS_TEST()
[Fact]
public async Task DATETIME_TEST1_TEST()
{
var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/DATETIME_TEST1.parquet", default);
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/DATETIME_TEST1.parquet", default);

Assert.Equal(10, parquetEngine.RecordCount);
Assert.Equal(3, parquetEngine.Fields.Count);
Expand All @@ -37,7 +42,7 @@ public async Task DATETIME_TEST1_TEST()
[Fact]
public async Task DATETIME_TEST2_TEST()
{
var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/DATETIME_TEST2.parquet", default);
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/DATETIME_TEST2.parquet", default);

Assert.Equal(1, parquetEngine.RecordCount);
Assert.Equal(11, parquetEngine.Fields.Count);
Expand All @@ -59,7 +64,7 @@ public async Task DATETIME_TEST2_TEST()
[Fact]
public async Task RANDOM_TEST_FILE1_TEST()
{
var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/RANDOM_TEST_FILE1.parquet", default);
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/RANDOM_TEST_FILE1.parquet", default);

Assert.Equal(5, parquetEngine.RecordCount);
Assert.Equal(42, parquetEngine.Fields.Count);
Expand All @@ -77,7 +82,7 @@ public async Task RANDOM_TEST_FILE1_TEST()
[Fact]
public async Task SAME_COLUMN_NAME_DIFFERENT_CASING_TEST()
{
var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/SAME_COLUMN_NAME_DIFFERENT_CASING1.parquet", default);
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/SAME_COLUMN_NAME_DIFFERENT_CASING1.parquet", default);

Assert.Equal(14610, parquetEngine.RecordCount);
Assert.Equal(12, parquetEngine.Fields.Count);
Expand All @@ -96,7 +101,7 @@ public async Task MULTIPLE_SCHEMAS_DETECTED_TEST()
[Fact]
public async Task PARTITIONED_PARQUET_FILE_TEST()
{
var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/PARTITIONED_PARQUET_FILE_TEST1", default);
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/PARTITIONED_PARQUET_FILE_TEST1", default);

Assert.Equal(2000, parquetEngine.RecordCount);
Assert.Equal(9, parquetEngine.Fields.Count);
Expand All @@ -121,7 +126,7 @@ public async Task PARTITIONED_PARQUET_FILE_TEST()
[Fact]
public async Task COLUMN_ENDING_IN_PERIOD_TEST1()
{
var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/COLUMN_ENDING_IN_PERIOD_TEST1.parquet", default);
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/COLUMN_ENDING_IN_PERIOD_TEST1.parquet", default);

Assert.Equal(1, parquetEngine.RecordCount);
Assert.Equal(11, parquetEngine.Fields.Count);
Expand All @@ -135,7 +140,7 @@ public async Task COLUMN_ENDING_IN_PERIOD_TEST1()
[Fact]
public async Task LIST_TYPE_TEST()
{
var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/LIST_TYPE_TEST1.parquet", default);
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/LIST_TYPE_TEST1.parquet", default);

Assert.Equal(3, parquetEngine.RecordCount);
Assert.Equal(2, parquetEngine.Fields.Count);
Expand All @@ -157,7 +162,7 @@ public async Task LIST_TYPE_TEST()
[Fact]
public async Task MAP_TYPE_TEST1()
{
var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/MAP_TYPE_TEST1.parquet", default);
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/MAP_TYPE_TEST1.parquet", default);

Assert.Equal(2, parquetEngine.RecordCount);
Assert.Equal(2, parquetEngine.Fields.Count);
Expand All @@ -172,5 +177,73 @@ public async Task MAP_TYPE_TEST1()
Assert.Equal("value2", ((MapValue)dataTable.Rows[1][0]).Key);
Assert.Equal("else", ((MapValue)dataTable.Rows[1][0]).Value);
}

[Fact]
public async Task AMPLITUDE_EVENT_TEST()
{
const string dummyApiKeyBase64 = "ZHVtbXk=";
var testEvent = new TestAmplitudeEvent(dummyApiKeyBase64)
{
IgnoredProperty = "xxx",
RegularProperty = "yyy"
};

string expectedRequestJson = @$"
{{
""api_key"": ""dummy"",
""events"": [{{
""device_id"": ""{AppSettings.AnalyticsDeviceId}"",
""event_type"": ""{TestAmplitudeEvent.EVENT_TYPE}"",
""user_properties"": {{
""rememberLastRowCount"": {AppSettings.RememberLastRowCount.ToString().ToLower()},
""lastRowCount"": {AppSettings.LastRowCount},
""alwaysSelectAllFields"": {AppSettings.AlwaysSelectAllFields.ToString().ToLower()},
""autoSizeColumnsMode"": ""{AppSettings.AutoSizeColumnsMode}"",
""dateTimeDisplayFormat"": ""{AppSettings.DateTimeDisplayFormat}"",
""systemMemory"": {(int)(GC.GetGCMemoryInfo().TotalAvailableMemoryBytes / 1048576.0 /*magic number*/)},
""processorCount"": {Environment.ProcessorCount}
}},
""event_properties"": {{
""regularProperty"": ""yyy""
}},
""session_id"": {testEvent.SessionId},
""language"": ""{CultureInfo.CurrentUICulture.Name}"",
""os_name"": ""{Environment.OSVersion.Platform}"",
""os_version"": ""{Environment.OSVersion.VersionString}"",
""app_version"": ""{AboutBox.AssemblyVersion}""
}}]
}}";

//mock the http request
var mockHttpHandler = new MockHttpMessageHandler();
_ = mockHttpHandler.Expect(HttpMethod.Post, "*").Respond(async (request) =>
{
//Verify the request we're sending is what we expect it to be
string requestJsonBody = await (request.Content?.ReadAsStringAsync() ?? Task.FromResult(string.Empty));
if (Regex.Replace(requestJsonBody, "\\s", string.Empty)
.Equals(Regex.Replace(expectedRequestJson, "\\s", string.Empty)))
return new HttpResponseMessage(System.Net.HttpStatusCode.OK);
else
return new HttpResponseMessage(System.Net.HttpStatusCode.BadRequest);
});
testEvent.SwapHttpClientHandler(mockHttpHandler);

bool wasSuccess = await testEvent.Record();
Assert.True(wasSuccess, "The event json we would have sent to Amplitude didn't match the expected value");
}

[Fact]
public async Task NULLABLE_GUID_TEST1()
{
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/NULLABLE_GUID_TEST1.parquet", default);

Assert.Equal(1, parquetEngine.RecordCount);
Assert.Equal(33, parquetEngine.Fields.Count);

var dataTable = await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, int.MaxValue, default);
Assert.Equal(false, dataTable.Rows[0][22]);
Assert.Equal(new Guid("0cf9cbfd-d320-45d7-b29f-9c2de1baa979"), dataTable.Rows[0][1]);
Assert.Equal(new DateTime(2019, 1, 1), dataTable.Rows[0][4]);
}
}
}
26 changes: 26 additions & 0 deletions src/ParquetViewer.Tests/TestAmplitudeEvent.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
using ParquetViewer.Analytics;
using System.Text.Json.Serialization;

namespace ParquetViewer.Tests
{
public class TestAmplitudeEvent : AmplitudeEvent
{
public const string EVENT_TYPE = "unit.test.event";

[JsonIgnore]
public string? IgnoredProperty { get; set; }

public string? RegularProperty { get; set; }

public TestAmplitudeEvent(string dummyApiKey) : base(EVENT_TYPE)
{
base.AMPLITUDE_API_KEY = dummyApiKey;
}

public void SwapHttpClientHandler(HttpMessageHandler mockHandler)
{
HttpMessageHandler = mockHandler;
BypassConsentRequirement = true;
}
}
}
3 changes: 1 addition & 2 deletions src/ParquetViewer.Tests/Usings.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
global using Xunit;
global using ParquetViewer.Engine;
global using System.Diagnostics;
global using Xunit;
2 changes: 1 addition & 1 deletion src/ParquetViewer/AboutBox.Designer.cs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion src/ParquetViewer/AboutBox.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

namespace ParquetViewer
{
partial class AboutBox : Form
public partial class AboutBox : Form
{
public AboutBox()
{
Expand Down
Loading