Skip to content

Commit

Permalink
Decimal256 java implementation with working integration tests. (apach…
Browse files Browse the repository at this point in the history
…e#8281)

This PR completes round trip between C++ and Java integration tests.
  • Loading branch information
emkornfield committed Oct 17, 2020
1 parent 6279531 commit 34b974a
Show file tree
Hide file tree
Showing 40 changed files with 1,424 additions and 215 deletions.
14 changes: 13 additions & 1 deletion cpp/src/arrow/testing/json_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -839,8 +839,20 @@ Status GetDecimal(const RjObject& json_type, std::shared_ptr<DataType>* type) {
ARROW_ASSIGN_OR_RAISE(const int32_t precision,
GetMemberInt<int32_t>(json_type, "precision"));
ARROW_ASSIGN_OR_RAISE(const int32_t scale, GetMemberInt<int32_t>(json_type, "scale"));
int32_t bit_width = 128;
Result<int32_t> maybe_bit_width = GetMemberInt<int32_t>(json_type, "bitWidth");
if (maybe_bit_width.ok()) {
bit_width = maybe_bit_width.ValueOrDie();
}

*type = decimal(precision, scale);
if (bit_width == 128) {
*type = decimal128(precision, scale);
} else if (bit_width == 256) {
*type = decimal256(precision, scale);
} else {
return Status::Invalid("Only 128 bit and 256 Decimals are supported. Received",
bit_width);
}
return Status::OK();
}

Expand Down
30 changes: 22 additions & 8 deletions dev/archery/archery/integration/datagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ def generate_column(self, size, name=None):
DECIMAL_PRECISION_TO_VALUE = {
key: (1 << (8 * i - 1)) - 1 for i, key in enumerate(
[1, 3, 5, 7, 10, 12, 15, 17, 19, 22, 24, 27, 29, 32, 34, 36,
38, 40, 42, 44, 50, 60, 70],
40, 42, 44, 50, 60, 70],
start=1,
)
}
Expand Down Expand Up @@ -1274,20 +1274,29 @@ def generate_null_trivial_case(batch_sizes):
return _generate_file('null_trivial', fields, batch_sizes)


def generate_decimal_case():
def generate_decimal128_case():
fields = [
DecimalField(name='f{}'.format(i), precision=precision, scale=2,
bit_width=128)
bit_width=128)
for i, precision in enumerate(range(3, 39))
] + [
]

possible_batch_sizes = 7, 10
batch_sizes = [possible_batch_sizes[i % 2] for i in range(len(fields))]
return _generate_file('decimal128', fields, batch_sizes)


def generate_decimal256_case():
fields = [
DecimalField(name='f{}'.format(i), precision=precision, scale=5,
bit_width=256)
bit_width=256)
for i, precision in enumerate(range(37, 70))
]

possible_batch_sizes = 7, 10
batch_sizes = [possible_batch_sizes[i % 2] for i in range(len(fields))]
return _generate_file('decimal', fields, batch_sizes)
return _generate_file('decimal256', fields, batch_sizes)



def generate_datetime_case():
Expand Down Expand Up @@ -1515,10 +1524,15 @@ def _temp_path():
.skip_category('JS') # TODO(ARROW-7900)
.skip_category('Go'), # TODO(ARROW-7901)

generate_decimal_case()
generate_decimal128_case()
.skip_category('Go') # TODO(ARROW-7948): Decimal + Go
.skip_category('Rust'),

generate_decimal256_case()
.skip_category('Go') # TODO(ARROW-7948): Decimal + Go
.skip_category('JS')
.skip_category('Rust'),
.skip_category('Java'),



generate_datetime_case(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ private static ArrowType createDecimalArrowType(LogicalTypes.Decimal logicalType
Preconditions.checkArgument(scale <= precision,
"Invalid decimal scale: %s (greater than precision: %s)", scale, precision);

return new ArrowType.Decimal(precision, scale);
return new ArrowType.Decimal(precision, scale, 128);

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ public static ArrowType getArrowTypeForJdbcField(JdbcFieldInfo fieldInfo, Calend
case Types.DECIMAL:
int precision = fieldInfo.getPrecision();
int scale = fieldInfo.getScale();
return new ArrowType.Decimal(precision, scale);
return new ArrowType.Decimal(precision, scale, 128);
case Types.REAL:
case Types.FLOAT:
return new ArrowType.FloatingPoint(SINGLE);
Expand Down
2 changes: 1 addition & 1 deletion java/vector/src/main/codegen/data/ArrowTypes.tdd
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@
},
{
name: "Decimal",
fields: [{name: "precision", type: int}, {name: "scale", type: int}],
fields: [{name: "precision", type: int}, {name: "scale", type: int}, {name: "bitWidth", type: int}],
complex: false
},
{
Expand Down
17 changes: 17 additions & 0 deletions java/vector/src/main/codegen/data/ValueVectorTypes.tdd
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,22 @@
{ class: "IntervalDay", millisecondsOffset: 4, friendlyType: "Duration", fields: [ {name: "days", type:"int"}, {name: "milliseconds", type:"int"}] }
]
},
{
major: "Fixed",
width: 32,
javaType: "ArrowBuf",
boxedType: "ArrowBuf",

minor: [
{
class: "BigDecimal",
maxPrecisionDigits: 76, nDecimalDigits: 4, friendlyType: "BigDecimal",
typeParams: [ {name: "scale", type: "int"}, { name: "precision", type: "int"}],
arrowType: "org.apache.arrow.vector.types.pojo.ArrowType.Decimal",
fields: [{name: "start", type: "long"}, {name: "buffer", type: "ArrowBuf"}]
}
]
},
{
major: "Fixed",
width: 16,
Expand All @@ -129,6 +145,7 @@
}
]
},

{
major: "Fixed",
width: -1,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,20 +76,20 @@ public void write(${name}Holder holder) {
fail("${name}");
}

<#if minor.class == "Decimal">
<#if minor.class?ends_with("Decimal")>
public void write${minor.class}(${friendlyType} value) {
fail("${name}");
}

public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list><#if minor.class == "Decimal">, ArrowType arrowType</#if>) {
public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>, ArrowType arrowType) {
fail("${name}");
}

public void writeBigEndianBytesToDecimal(byte[] value) {
public void writeBigEndianBytesTo${minor.class}(byte[] value) {
fail("${name}");
}

public void writeBigEndianBytesToDecimal(byte[] value, ArrowType arrowType) {
public void writeBigEndianBytesTo${minor.class}(byte[] value, ArrowType arrowType) {
fail("${name}");
}
</#if>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ public void endList() {

<#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
<#assign fields = minor.fields!type.fields />
<#if minor.class != "Decimal">
<#if minor.class != "Decimal" && minor.class != "BigDecimal">
@Override
public void write(${name}Holder holder) {
getWriter(MinorType.${name?upper_case}).write(holder);
Expand All @@ -85,7 +85,7 @@ public void write(${name}Holder holder) {
getWriter(MinorType.${name?upper_case}).write${minor.class}(<#list fields as field>${field.name}<#if field_has_next>, </#if></#list>);
}

<#else>
<#elseif minor.class == "Decimal">
@Override
public void write(DecimalHolder holder) {
getWriter(MinorType.DECIMAL).write(holder);
Expand All @@ -106,6 +106,28 @@ public void writeBigEndianBytesToDecimal(byte[] value, ArrowType arrowType) {
public void writeBigEndianBytesToDecimal(byte[] value) {
getWriter(MinorType.DECIMAL).writeBigEndianBytesToDecimal(value);
}
<#elseif minor.class == "BigDecimal">
@Override
public void write(BigDecimalHolder holder) {
getWriter(MinorType.BIGDECIMAL).write(holder);
}
public void writeBigDecimal(int start, ArrowBuf buffer, ArrowType arrowType) {
getWriter(MinorType.BIGDECIMAL).writeBigDecimal(start, buffer, arrowType);
}
public void writeBigDecimal(int start, ArrowBuf buffer) {
getWriter(MinorType.BIGDECIMAL).writeBigDecimal(start, buffer);
}
public void writeBigEndianBytesToBigDecimal(byte[] value, ArrowType arrowType) {
getWriter(MinorType.BIGDECIMAL).writeBigEndianBytesToBigDecimal(value, arrowType);
}
public void writeBigEndianBytesToBigDecimal(byte[] value) {
getWriter(MinorType.BIGDECIMAL).writeBigEndianBytesToBigDecimal(value);
}
</#if>
</#list></#list>
Expand Down
5 changes: 2 additions & 3 deletions java/vector/src/main/codegen/templates/ArrowType.java
Original file line number Diff line number Diff line change
Expand Up @@ -327,9 +327,8 @@ public static org.apache.arrow.vector.types.pojo.ArrowType getTypeForField(org.a
</#if>
</#list>
<#if type.name == "Decimal">
int bitWidth = ${nameLower}Type.bitWidth();
if (bitWidth != defaultDecimalBitWidth) {
throw new IllegalArgumentException("Library only supports 128-bit decimal values");
if (bitWidth != defaultDecimalBitWidth && bitWidth != 256) {
throw new IllegalArgumentException("Library only supports 128-bit and 256-bit decimal values");
}
</#if>
return new ${name}(<#list type.fields as field><#if field.valueType??>${field.valueType}.fromFlatbufID(${field.name})<#else>${field.name}</#if><#if field_has_next>, </#if></#list>);
Expand Down
9 changes: 5 additions & 4 deletions java/vector/src/main/codegen/templates/ComplexCopier.java
Original file line number Diff line number Diff line change
Expand Up @@ -117,14 +117,14 @@ private static void writeValue(FieldReader reader, FieldWriter writer) {
<#assign fields = minor.fields!type.fields />
<#assign uncappedName = name?uncap_first/>

<#if !minor.typeParams?? || minor.class?starts_with("Decimal") >
<#if !minor.typeParams?? || minor.class?ends_with("Decimal") >

case ${name?upper_case}:
if (reader.isSet()) {
Nullable${name}Holder ${uncappedName}Holder = new Nullable${name}Holder();
reader.read(${uncappedName}Holder);
if (${uncappedName}Holder.isSet == 1) {
writer.write${name}(<#list fields as field>${uncappedName}Holder.${field.name}<#if field_has_next>, </#if></#list><#if minor.class == "Decimal">, new ArrowType.Decimal(decimalHolder.precision, decimalHolder.scale)</#if>);
writer.write${name}(<#list fields as field>${uncappedName}Holder.${field.name}<#if field_has_next>, </#if></#list><#if minor.class?ends_with("Decimal")>, new ArrowType.Decimal(${uncappedName}Holder.precision, ${uncappedName}Holder.scale, ${name}Holder.WIDTH * 8)</#if>);
}
} else {
writer.writeNull();
Expand All @@ -145,7 +145,7 @@ private static FieldWriter getStructWriterForReader(FieldReader reader, StructWr
case ${name?upper_case}:
return (FieldWriter) writer.<#if name == "Int">integer<#else>${uncappedName}</#if>(name);
</#if>
<#if minor.class == "Decimal">
<#if minor.class?ends_with("Decimal")>
case ${name?upper_case}:
if (reader.getField().getType() instanceof ArrowType.Decimal) {
ArrowType.Decimal type = (ArrowType.Decimal) reader.getField().getType();
Expand All @@ -154,6 +154,7 @@ private static FieldWriter getStructWriterForReader(FieldReader reader, StructWr
return (FieldWriter) writer.${uncappedName}(name);
}
</#if>
</#list></#list>
case STRUCT:
return (FieldWriter) writer.struct(name);
Expand All @@ -171,7 +172,7 @@ private static FieldWriter getListWriterForReader(FieldReader reader, ListWriter
<#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
<#assign fields = minor.fields!type.fields />
<#assign uncappedName = name?uncap_first/>
<#if !minor.typeParams?? || minor.class?starts_with("Decimal") >
<#if !minor.typeParams?? || minor.class?ends_with("Decimal") >
case ${name?upper_case}:
return (FieldWriter) writer.<#if name == "Int">integer<#else>${uncappedName}</#if>();
</#if>
Expand Down
27 changes: 14 additions & 13 deletions java/vector/src/main/codegen/templates/ComplexWriters.java
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ public void setPosition(int idx) {
<#else>
<#if minor.class != "Decimal">
<#if !minor.class?ends_with("Decimal")>
public void write(${minor.class}Holder h) {
vector.setSafe(idx(), h);
vector.setValueCount(idx()+1);
Expand All @@ -123,53 +123,54 @@ public void write(Nullable${minor.class}Holder h) {
}
</#if>
<#if minor.class == "Decimal">
<#if minor.class?ends_with("Decimal")>
public void write(DecimalHolder h){
public void write(${minor.class}Holder h){
DecimalUtility.checkPrecisionAndScale(h.precision, h.scale, vector.getPrecision(), vector.getScale());
vector.setSafe(idx(), h);
vector.setValueCount(idx() + 1);
}
public void write(NullableDecimalHolder h){
public void write(Nullable${minor.class}Holder h){
if (h.isSet == 1) {
DecimalUtility.checkPrecisionAndScale(h.precision, h.scale, vector.getPrecision(), vector.getScale());
}
vector.setSafe(idx(), h);
vector.setValueCount(idx() + 1);
}
public void writeDecimal(long start, ArrowBuf buffer){
public void write${minor.class}(long start, ArrowBuf buffer){
vector.setSafe(idx(), 1, start, buffer);
vector.setValueCount(idx() + 1);
}
public void writeDecimal(long start, ArrowBuf buffer, ArrowType arrowType){
public void write${minor.class}(long start, ArrowBuf buffer, ArrowType arrowType){
DecimalUtility.checkPrecisionAndScale(((ArrowType.Decimal) arrowType).getPrecision(),
((ArrowType.Decimal) arrowType).getScale(), vector.getPrecision(), vector.getScale());
vector.setSafe(idx(), 1, start, buffer);
vector.setValueCount(idx() + 1);
}
public void writeDecimal(BigDecimal value){
public void write${minor.class}(BigDecimal value){
// vector.setSafe already does precision and scale checking
vector.setSafe(idx(), value);
vector.setValueCount(idx() + 1);
}
public void writeBigEndianBytesToDecimal(byte[] value, ArrowType arrowType){
public void writeBigEndianBytesTo${minor.class}(byte[] value, ArrowType arrowType){
DecimalUtility.checkPrecisionAndScale(((ArrowType.Decimal) arrowType).getPrecision(),
((ArrowType.Decimal) arrowType).getScale(), vector.getPrecision(), vector.getScale());
vector.setBigEndianSafe(idx(), value);
vector.setValueCount(idx() + 1);
}
public void writeBigEndianBytesToDecimal(byte[] value){
public void writeBigEndianBytesTo${minor.class}(byte[] value){
vector.setBigEndianSafe(idx(), value);
vector.setValueCount(idx() + 1);
}
</#if>
public void writeNull() {
vector.setNull(idx());
vector.setValueCount(idx()+1);
Expand All @@ -190,18 +191,18 @@ public void writeNull() {
public interface ${eName}Writer extends BaseWriter {
public void write(${minor.class}Holder h);
<#if minor.class == "Decimal">@Deprecated</#if>
<#if minor.class?ends_with("Decimal")>@Deprecated</#if>
public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>);
<#if minor.class == "Decimal">
<#if minor.class?ends_with("Decimal")>

public void write${minor.class}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>, ArrowType arrowType);

public void write${minor.class}(${friendlyType} value);

public void writeBigEndianBytesToDecimal(byte[] value, ArrowType arrowType);
public void writeBigEndianBytesTo${minor.class}(byte[] value, ArrowType arrowType);

@Deprecated
public void writeBigEndianBytesToDecimal(byte[] value);
public void writeBigEndianBytesTo${minor.class}(byte[] value);
</#if>
}

Expand Down
4 changes: 2 additions & 2 deletions java/vector/src/main/codegen/templates/DenseUnionReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ private FieldReader getReaderForIndex(int index) {
<#list type.minor as minor>
<#assign name = minor.class?cap_first />
<#assign uncappedName = name?uncap_first/>
<#if !minor.typeParams?? || minor.class == "Decimal">
<#if !minor.typeParams?? || minor.class?ends_with("Decimal")>
case ${name?upper_case}:
reader = (FieldReader) get${name}(typeId);
break;
Expand Down Expand Up @@ -165,7 +165,7 @@ public int size() {
<#assign friendlyType = (minor.friendlyType!minor.boxedType!type.boxedType) />
<#assign safeType=friendlyType />
<#if safeType=="byte[]"><#assign safeType="ByteArray" /></#if>
<#if !minor.typeParams?? || minor.class == "Decimal">
<#if !minor.typeParams?? || minor.class?ends_with("Decimal")>

private ${name}ReaderImpl get${name}(byte typeId) {
${name}ReaderImpl reader = (${name}ReaderImpl) readers[typeId];
Expand Down
Loading

0 comments on commit 34b974a

Please sign in to comment.