Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion dev/archery/archery/integration/datagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -1523,7 +1523,6 @@ def _temp_path():

generate_nested_large_offsets_case()
.skip_category('Go')
.skip_category('Java') # TODO(ARROW-6111)
.skip_category('JS')
.skip_category('Rust'),

Expand Down
5 changes: 5 additions & 0 deletions java/vector/src/main/codegen/data/ArrowTypes.tdd
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@
fields: [],
complex: true
},
{
name: "LargeList",
fields: [],
complex: true
},
{
name: "FixedSizeList",
fields: [{name: "listSize", type: int}],
Expand Down
1 change: 1 addition & 0 deletions java/vector/src/main/codegen/templates/ComplexCopier.java
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ private static void writeValue(FieldReader reader, FieldWriter writer) {
switch (mt) {

case LIST:
case LARGELIST:
case FIXED_SIZE_LIST:
if (reader.isSet()) {
writer.startList();
Expand Down
34 changes: 28 additions & 6 deletions java/vector/src/main/codegen/templates/UnionListWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,38 +23,44 @@
import java.math.BigDecimal;

<@pp.dropOutputFile />
<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/UnionListWriter.java" />
<#list ["List", "LargeList"] as listName>

<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/Union${listName}Writer.java" />

<#include "/@includes/license.ftl" />

package org.apache.arrow.vector.complex.impl;

import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
<#include "/@includes/vv_imports.ftl" />

/*
* This class is generated using freemarker and the ${.template_name} template.
*/

@SuppressWarnings("unused")
public class UnionListWriter extends AbstractFieldWriter {
public class Union${listName}Writer extends AbstractFieldWriter {

protected ListVector vector;
protected ${listName}Vector vector;
protected PromotableWriter writer;
private boolean inStruct = false;
private String structName;
<#if listName == "LargeList">
private static final long OFFSET_WIDTH = 8;
<#else>
private static final int OFFSET_WIDTH = 4;
</#if>

public UnionListWriter(ListVector vector) {
public Union${listName}Writer(${listName}Vector vector) {
this(vector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance());
}

public UnionListWriter(ListVector vector, NullableStructWriterFactory nullableStructWriterFactory) {
public Union${listName}Writer(${listName}Vector vector, NullableStructWriterFactory nullableStructWriterFactory) {
this.vector = vector;
this.writer = new PromotableWriter(vector.getDataVector(), vector, nullableStructWriterFactory);
}

public UnionListWriter(ListVector vector, AbstractFieldWriter parent) {
public Union${listName}Writer(${listName}Vector vector, AbstractFieldWriter parent) {
this(vector);
}

Expand Down Expand Up @@ -92,6 +98,7 @@ public void close() throws Exception {
public void setPosition(int index) {
super.setPosition(index);
}

<#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
<#assign fields = minor.fields!type.fields />
<#assign uncappedName = name?uncap_first/>
Expand Down Expand Up @@ -149,6 +156,19 @@ public StructWriter struct(String name) {
return structWriter;
}

<#if listName == "LargeList">
@Override
public void startList() {
vector.startNewValue(idx());
writer.setPosition(checkedCastToInt(vector.getOffsetBuffer().getLong(((long) idx() + 1L) * OFFSET_WIDTH)));
}

@Override
public void endList() {
vector.getOffsetBuffer().setLong(((long) idx() + 1L) * OFFSET_WIDTH, writer.idx());
setPosition(idx() + 1);
}
<#else>
@Override
public void startList() {
vector.startNewValue(idx());
Expand All @@ -160,6 +180,7 @@ public void endList() {
vector.getOffsetBuffer().setInt((idx() + 1) * OFFSET_WIDTH, writer.idx());
setPosition(idx() + 1);
}
</#if>

@Override
public void start() {
Expand Down Expand Up @@ -224,3 +245,4 @@ public void write(${name}Holder holder) {
</#list>
</#list>
}
</#list>
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import static io.netty.util.internal.PlatformDependent.getByte;
import static io.netty.util.internal.PlatformDependent.getInt;
import static io.netty.util.internal.PlatformDependent.getLong;
import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;

import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.memory.BoundsChecking;
Expand All @@ -37,6 +38,20 @@ public class BitVectorHelper {

private BitVectorHelper() {}

/**
* Get the index of byte corresponding to bit index in validity buffer.
*/
public static long byteIndex(long absoluteBitIndex) {
return absoluteBitIndex >> 3;
}

/**
* Get the relative index of bit within the byte in validity buffer.
*/
public static int bitIndex(long absoluteBitIndex) {
return checkedCastToInt(absoluteBitIndex & 7);
}

/**
* Get the index of byte corresponding to bit index in validity buffer.
*/
Expand All @@ -57,11 +72,11 @@ public static int bitIndex(int absoluteBitIndex) {
* @param validityBuffer validity buffer of the vector
* @param index index to be set
*/
public static void setBit(ArrowBuf validityBuffer, int index) {
public static void setBit(ArrowBuf validityBuffer, long index) {
// it can be observed that some logic is duplicate of the logic in setValidityBit.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can replace the old implementation

void setBit(ArrowBuf validityBuffer, int index)

with this one, as now ArrowBuf is based on 64-bit index.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

// this is because JIT cannot always remove the if branch in setValidityBit,
// so we give a dedicated implementation for setting bits.
final int byteIndex = byteIndex(index);
final long byteIndex = byteIndex(index);
final int bitIndex = bitIndex(index);

// the byte is promoted to an int, because according to Java specification,
Expand All @@ -76,7 +91,7 @@ public static void setBit(ArrowBuf validityBuffer, int index) {
/**
* Set the bit at provided index to 1.
*
* @deprecated Please use {@link BitVectorHelper#setBit(ArrowBuf, int)} instead..
* @deprecated Please use {@link BitVectorHelper#setBit(ArrowBuf, long)} instead..
*/
@Deprecated
public static void setValidityBitToOne(ArrowBuf validityBuffer, int index) {
Expand Down
15 changes: 15 additions & 0 deletions java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,15 @@ public TypeLayout visit(org.apache.arrow.vector.types.pojo.ArrowType.List type)
return new TypeLayout(vectors);
}

@Override
public TypeLayout visit(ArrowType.LargeList type) {
List<BufferLayout> vectors = asList(
BufferLayout.validityVector(),
BufferLayout.largeOffsetBuffer()
);
return new TypeLayout(vectors);
}

@Override
public TypeLayout visit(FixedSizeList type) {
List<BufferLayout> vectors = asList(
Expand Down Expand Up @@ -295,6 +304,12 @@ public Integer visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) {
return 2;
}

@Override
public Integer visit(ArrowType.LargeList type) {
// validity buffer + offset buffer
return 2;
}

@Override
public Integer visit(FixedSizeList type) {
// validity buffer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

package org.apache.arrow.vector.compare;

import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;

import java.util.List;
import java.util.function.BiFunction;

Expand All @@ -30,6 +32,7 @@
import org.apache.arrow.vector.complex.BaseRepeatedValueVector;
import org.apache.arrow.vector.complex.DenseUnionVector;
import org.apache.arrow.vector.complex.FixedSizeListVector;
import org.apache.arrow.vector.complex.LargeListVector;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.NonNullableStructVector;
import org.apache.arrow.vector.complex.UnionVector;
Expand Down Expand Up @@ -171,6 +174,14 @@ public Boolean visit(FixedSizeListVector left, Range range) {
return compareFixedSizeListVectors(range);
}

@Override
public Boolean visit(LargeListVector left, Range range) {
if (!validate(left)) {
return false;
}
return compareLargeListVectors(range);
}

@Override
public Boolean visit(NonNullableStructVector left, Range range) {
if (!validate(left)) {
Expand Down Expand Up @@ -494,4 +505,46 @@ protected boolean compareFixedSizeListVectors(Range range) {
}
return true;
}

protected boolean compareLargeListVectors(Range range) {
LargeListVector leftVector = (LargeListVector) left;
LargeListVector rightVector = (LargeListVector) right;

RangeEqualsVisitor innerVisitor =
createInnerVisitor(leftVector.getDataVector(), rightVector.getDataVector(), /*type comparator*/ null);
Range innerRange = new Range();

for (int i = 0; i < range.getLength(); i++) {
int leftIndex = range.getLeftStart() + i;
int rightIndex = range.getRightStart() + i;

boolean isNull = leftVector.isNull(leftIndex);
if (isNull != rightVector.isNull(rightIndex)) {
return false;
}

long offsetWidth = LargeListVector.OFFSET_WIDTH;

if (!isNull) {
final long startIndexLeft = leftVector.getOffsetBuffer().getLong((long) leftIndex * offsetWidth);
final long endIndexLeft = leftVector.getOffsetBuffer().getLong((long) (leftIndex + 1) * offsetWidth);

final long startIndexRight = rightVector.getOffsetBuffer().getLong((long) rightIndex * offsetWidth);
final long endIndexRight = rightVector.getOffsetBuffer().getLong((long) (rightIndex + 1) * offsetWidth);

if ((endIndexLeft - startIndexLeft) != (endIndexRight - startIndexRight)) {
return false;
}

innerRange = innerRange // TODO revisit these casts when long indexing is finished
.setRightStart(checkedCastToInt(startIndexRight))
.setLeftStart(checkedCastToInt(startIndexLeft))
.setLength(checkedCastToInt(endIndexLeft - startIndexLeft));
if (!innerVisitor.rangeEquals(innerRange)) {
return false;
}
}
}
return true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.complex.DenseUnionVector;
import org.apache.arrow.vector.complex.FixedSizeListVector;
import org.apache.arrow.vector.complex.LargeListVector;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.NonNullableStructVector;
import org.apache.arrow.vector.complex.UnionVector;
Expand Down Expand Up @@ -93,6 +94,11 @@ public Boolean visit(FixedSizeListVector left, Void value) {
return compareField(left.getField(), right.getField());
}

@Override
public Boolean visit(LargeListVector left, Void value) {
return compareField(left.getField(), right.getField());
}

@Override
public Boolean visit(NonNullableStructVector left, Void value) {
return compareField(left.getField(), right.getField());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.arrow.vector.NullVector;
import org.apache.arrow.vector.complex.DenseUnionVector;
import org.apache.arrow.vector.complex.FixedSizeListVector;
import org.apache.arrow.vector.complex.LargeListVector;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.NonNullableStructVector;
import org.apache.arrow.vector.complex.UnionVector;
Expand All @@ -44,6 +45,8 @@ public interface VectorVisitor<OUT, IN> {

OUT visit(FixedSizeListVector left, IN value);

OUT visit(LargeListVector left, IN value);

OUT visit(NonNullableStructVector left, IN value);

OUT visit(UnionVector left, IN value);
Expand Down
Loading