From 55cd6f5de318a9c53cb8413d34925c9b55ba98e2 Mon Sep 17 00:00:00 2001 From: XiaoYun Zhang Date: Thu, 20 Apr 2023 15:11:04 -0700 Subject: [PATCH 1/3] fix datetime null error --- .../Database/DatabaseLoaderCursor.cs | 2 +- .../Microsoft.ML.Tests/DatabaseLoaderTests.cs | 54 +++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/src/Microsoft.ML.Data/DataLoadSave/Database/DatabaseLoaderCursor.cs b/src/Microsoft.ML.Data/DataLoadSave/Database/DatabaseLoaderCursor.cs index ed092be157..c074d0d1f3 100644 --- a/src/Microsoft.ML.Data/DataLoadSave/Database/DatabaseLoaderCursor.cs +++ b/src/Microsoft.ML.Data/DataLoadSave/Database/DatabaseLoaderCursor.cs @@ -307,7 +307,7 @@ private ValueGetter CreateByteGetterDelegate(ColInfo colInfo) private ValueGetter CreateDateTimeGetterDelegate(ColInfo colInfo) { int columnIndex = GetColumnIndex(colInfo); - return (ref DateTime value) => value = DataReader.GetDateTime(columnIndex); + return (ref DateTime value) => value = DataReader.IsDBNull(columnIndex) ? default : DataReader.GetDateTime(columnIndex); } private ValueGetter CreateDoubleGetterDelegate(ColInfo colInfo) diff --git a/test/Microsoft.ML.Tests/DatabaseLoaderTests.cs b/test/Microsoft.ML.Tests/DatabaseLoaderTests.cs index 21b6d96d19..555144dfee 100644 --- a/test/Microsoft.ML.Tests/DatabaseLoaderTests.cs +++ b/test/Microsoft.ML.Tests/DatabaseLoaderTests.cs @@ -7,7 +7,9 @@ using System.Data.SqlClient; using System.Data.SQLite; using System.IO; +using System.Linq; using System.Runtime.InteropServices; +using FluentAssertions; using Microsoft.ML.Data; using Microsoft.ML.RunTests; using Microsoft.ML.TestFramework; @@ -222,6 +224,38 @@ public void IrisSdcaMaximumEntropy() }).PredictedLabel); } + [Fact] + public void TestLoadDatetimeColumnWithNullValue() + { + var connectionString = "DataSource=Dummy;Mode=Memory;Version=3;Timeout=120;Cache=Shared"; + using (var connection = new SQLiteConnection(connectionString)) + { + connection.Open(); + using (var command = new SQLiteCommand(connection)) + { + command.CommandText = """ + BEGIN; + CREATE TABLE IF NOT EXISTS Datetime (datetime Datetime NULL); + INSERT INTO Datetime VALUES (NULL); + INSERT INTO Datetime VALUES ('2018-01-01 00:00:00'); + COMMIT; + """; + command.ExecuteNonQuery(); + } + } + var mlContext = new MLContext(seed: 1); + var loader = mlContext.Data.CreateDatabaseLoader(new DatabaseLoader.Column("datetime", DbType.DateTime, 0)); + var source = new DatabaseSource(SQLiteFactory.Instance, connectionString, "SELECT datetime FROM Datetime"); + var data = loader.Load(source); + var datetimes = data.GetColumn("datetime").ToArray(); + datetimes.Count().Should().Be(2); + + // Convert null value to DateTime.MinValue, aka 0001-01-01 00:00:00 + // This is the default behavior of TextLoader as well. + datetimes[0].Should().Be(DateTime.MinValue); + datetimes[1].Should().Be(new DateTime(2018, 1, 1, 0, 0, 0)); + } + /// /// Non-Windows builds do not support SqlClientFactory/MSSQL databases. Hence, an equivalent /// SQLite database is used on Linux and MacOS builds. @@ -255,6 +289,26 @@ private string GetSQLiteConnectionString(string databaseName) return $@"Data Source={databaseFile};Version=3;Read Only=True;Timeout=120;"; } + private string CreateDummyDatabaseWithMissingDatetimeColumn() + { + var connectionString = "DataSource=:memory:;Version=3;Timeout=120;"; + using (var connection = new SQLiteConnection(connectionString)) + { + connection.Open(); + using (var command = new SQLiteCommand(connection)) + { + command.CommandText = """ + CREATE TABLE Datetime (datetime Datetime NULL); + INSERT INTO Datetime VALUES (NULL); + INSERT INTO Datetime VALUES ('2018-01-01 00:00:00'); + """; + var l = command.ExecuteNonQuery(); + } + } + + return connectionString; + } + public class IrisData { public int Label; From afcff06326e34e7d7bff6259a602d2473708ce6c Mon Sep 17 00:00:00 2001 From: XiaoYun Zhang Date: Thu, 20 Apr 2023 15:50:45 -0700 Subject: [PATCH 2/3] clean up --- .../Microsoft.ML.Tests/DatabaseLoaderTests.cs | 20 ------------------- 1 file changed, 20 deletions(-) diff --git a/test/Microsoft.ML.Tests/DatabaseLoaderTests.cs b/test/Microsoft.ML.Tests/DatabaseLoaderTests.cs index 555144dfee..27b1143908 100644 --- a/test/Microsoft.ML.Tests/DatabaseLoaderTests.cs +++ b/test/Microsoft.ML.Tests/DatabaseLoaderTests.cs @@ -289,26 +289,6 @@ private string GetSQLiteConnectionString(string databaseName) return $@"Data Source={databaseFile};Version=3;Read Only=True;Timeout=120;"; } - private string CreateDummyDatabaseWithMissingDatetimeColumn() - { - var connectionString = "DataSource=:memory:;Version=3;Timeout=120;"; - using (var connection = new SQLiteConnection(connectionString)) - { - connection.Open(); - using (var command = new SQLiteCommand(connection)) - { - command.CommandText = """ - CREATE TABLE Datetime (datetime Datetime NULL); - INSERT INTO Datetime VALUES (NULL); - INSERT INTO Datetime VALUES ('2018-01-01 00:00:00'); - """; - var l = command.ExecuteNonQuery(); - } - } - - return connectionString; - } - public class IrisData { public int Label; From 81752bdd742ae42d9a042b4f67762c145971fc93 Mon Sep 17 00:00:00 2001 From: XiaoYun Zhang Date: Thu, 20 Apr 2023 16:28:03 -0700 Subject: [PATCH 3/3] enable test only on x64/x86 machines --- test/Microsoft.ML.Tests/DatabaseLoaderTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Microsoft.ML.Tests/DatabaseLoaderTests.cs b/test/Microsoft.ML.Tests/DatabaseLoaderTests.cs index 27b1143908..4f7ebef980 100644 --- a/test/Microsoft.ML.Tests/DatabaseLoaderTests.cs +++ b/test/Microsoft.ML.Tests/DatabaseLoaderTests.cs @@ -224,7 +224,7 @@ public void IrisSdcaMaximumEntropy() }).PredictedLabel); } - [Fact] + [X86X64FactAttribute("The SQLite un-managed code, SQLite.interop, only supports x86/x64 architectures.")] public void TestLoadDatetimeColumnWithNullValue() { var connectionString = "DataSource=Dummy;Mode=Memory;Version=3;Timeout=120;Cache=Shared";