Skip to content

Commit

Permalink
fix(csharp/src/Drivers/Apache): set the precision and scale correctly…
Browse files Browse the repository at this point in the history
… on Decimal128Type
  • Loading branch information
birschick-bq committed May 13, 2024
1 parent 73b8bda commit 481f190
Showing 1 changed file with 64 additions and 5 deletions.
69 changes: 64 additions & 5 deletions csharp/src/Drivers/Apache/Spark/SparkConnection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ public class SparkConnection : HiveServer2Connection
const string InfoDriverVersion = "1.0.0";
const string InfoVendorName = "Spark";
const string InfoDriverArrowVersion = "1.0.0";

private const int DecimalPrecisionDefault = 10;
private const int DecimalScaleDefault = 0;
internal static TSparkGetDirectResults sparkGetDirectResults = new TSparkGetDirectResults(1000);

internal static readonly Dictionary<string, string> timestampConfig = new Dictionary<string, string>
Expand Down Expand Up @@ -288,6 +289,10 @@ public override Schema GetTableSchema(string? catalog, string? dbSchema, string?
int? columnType = columns[4].I32Val.Values.GetValue(i);
string typeName = columns[5].StringVal.Values.GetString(i);
bool nullable = columns[10].I32Val.Values.GetValue(i) == 1;
// Note: the following two columns do not seem to be set correctly for DECIMAL types.
//int? columnSize = columns[6].I32Val.Values.GetValue(i);
//int? decimalDigits = columns[8].I32Val.Values.GetValue(i);

IArrowType dataType = SparkConnection.GetArrowType((ColumnTypeId)columnType!.Value, typeName);
fields[i] = new Field(columnName, dataType, nullable);
}
Expand Down Expand Up @@ -481,8 +486,9 @@ private static IArrowType GetArrowType(ColumnTypeId columnTypeId, string typeNam
case ColumnTypeId.CHAR_TYPE:
return StringType.Default;
case ColumnTypeId.DECIMAL_TYPE:
// TODO: Parse typeName for precision and scale, because not available in other metadata.
return new Decimal128Type(38, 38);
// Note: parsing the type definition is only viable at the table level. Won't
// work for statement results.
return SqlDecimalTypeParser.ParseOrDefault(typeName, new Decimal128Type(DecimalPrecisionDefault, DecimalScaleDefault));
case ColumnTypeId.ARRAY_TYPE:
case ColumnTypeId.MAP_TYPE:
case ColumnTypeId.STRUCT_TYPE:
Expand Down Expand Up @@ -521,7 +527,6 @@ private StructArray GetDbSchemas(

}


IReadOnlyList<Field> schema = StandardSchemas.DbSchemaSchema;
IReadOnlyList<IArrowArray> dataArrays = schema.Validate(
new List<IArrowArray>
Expand Down Expand Up @@ -688,9 +693,63 @@ private string PatternToRegEx(string? pattern)

return builder.ToString();
}

/// <summary>
/// Provides a parser for SQL DECIMAL type definitions.
/// </summary>
private static class SqlDecimalTypeParser
{
// Pattern is based on this definition
// https://docs.databricks.com/en/sql/language-manual/data-types/decimal-value.html#syntax
// { DECIMAL | DEC | NUMERIC } [ ( p [ , s ] ) ]
private static readonly Regex s_expression = new(
@"^\s*(?<typeName>((DECIMAL)|(DEC)|(NUMERIC)))(\s*\(\s*((?<precision>\d+)(\s*\,\s*(?<scale>\d+))?)\s*\))?\s*$",
RegexOptions.IgnoreCase | RegexOptions.Compiled | RegexOptions.CultureInvariant);

/// <summary>
/// Parses the input string for a valid SQL DECIMAL type definition and returns a new <see cref="Decimal128Type"/> or returns the <c>defaultValue</c>, if invalid.
/// </summary>
/// <param name="input">The SQL type defintion string to parse.</param>
/// <param name="defaultValue">If input string is an invalid SQL DECIMAL type definition, this value is returned instead.</param>
/// <returns>If input string is a valid SQL DECIMAL type definition, it returns a new <see cref="Decimal128Type"/>; otherwise <c>defaultValue</c>.</returns>
public static Decimal128Type ParseOrDefault(string input, Decimal128Type defaultValue)
{
return TryParse(input, out Decimal128Type? candidate) ? candidate! : defaultValue;
}

/// <summary>
/// Tries to parse the input string for a valid SQL DECIMAL type definition.
/// </summary>
/// <param name="input">The SQL type defintion string to parse.</param>
/// <param name="value">If successful, an new <see cref="Decimal128Type"/> with the precision and scale set; otherwise <c>null</c>.</param>
/// <returns>True if it can successfully parse the type definition input string; otherwise false.</returns>
private static bool TryParse(string input, out Decimal128Type? value)
{
// Ensure defaults are set, in case not provided in precision/scale clause.
int precision = 10;
int scale = 0;

Match match = s_expression.Match(input);
if (!match.Success)
{
value = null;
return false;
}

GroupCollection groups = match.Groups;
Group precisionGroup = groups["precision"];
Group scaleGroup = groups["scale"];

precision = precisionGroup.Success ? int.Parse(precisionGroup.Value) : precision;
scale = scaleGroup.Success ? int.Parse(scaleGroup.Value) : scale;

value = new Decimal128Type(precision, scale);
return true;
}
}
}

public struct TableInfoPair
internal struct TableInfoPair
{
public string Type { get; set; }

Expand Down

0 comments on commit 481f190

Please sign in to comment.