diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ArrowColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ArrowColumnVector.java index e99201f6372fe..d53e1fcab0c5a 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ArrowColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ArrowColumnVector.java @@ -54,11 +54,6 @@ public int numNulls() { return accessor.getNullCount(); } - @Override - public boolean anyNullsSet() { - return numNulls() > 0; - } - @Override public void close() { if (childColumns != null) { diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java index fd5caf3bf170b..dc7c1269bedd9 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java @@ -53,12 +53,6 @@ public abstract class ColumnVector implements AutoCloseable { */ public abstract int numNulls(); - /** - * Returns true if any of the nulls indicator are set for this column. This can be used - * as an optimization to prevent setting nulls. - */ - public abstract boolean anyNullsSet(); - /** * Returns whether the value at rowId is NULL. */ diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java index 5f1b9885334b7..1c45b846790b6 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java @@ -110,7 +110,6 @@ public void putNotNull(int rowId) { public void putNull(int rowId) { Platform.putByte(null, nulls + rowId, (byte) 1); ++numNulls; - anyNullsSet = true; } @Override @@ -119,13 +118,12 @@ public void putNulls(int rowId, int count) { for (int i = 0; i < count; ++i, ++offset) { Platform.putByte(null, offset, (byte) 1); } - anyNullsSet = true; numNulls += count; } @Override public void putNotNulls(int rowId, int count) { - if (!anyNullsSet) return; + if (numNulls == 0) return; long offset = nulls + rowId; for (int i = 0; i < count; ++i, ++offset) { Platform.putByte(null, offset, (byte) 0); diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java index f12772ede575d..1d538fe4181b7 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java @@ -107,7 +107,6 @@ public void putNotNull(int rowId) { public void putNull(int rowId) { nulls[rowId] = (byte)1; ++numNulls; - anyNullsSet = true; } @Override @@ -115,13 +114,12 @@ public void putNulls(int rowId, int count) { for (int i = 0; i < count; ++i) { nulls[rowId + i] = (byte)1; } - anyNullsSet = true; numNulls += count; } @Override public void putNotNulls(int rowId, int count) { - if (!anyNullsSet) return; + if (numNulls == 0) return; for (int i = 0; i < count; ++i) { nulls[rowId + i] = (byte)0; } diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java index 63cf60818a855..5f6f125976e12 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java @@ -54,12 +54,11 @@ public void reset() { ((WritableColumnVector) c).reset(); } } - numNulls = 0; elementsAppended = 0; - if (anyNullsSet) { + if (numNulls > 0) { putNotNulls(0, capacity); - anyNullsSet = false; } + numNulls = 0; } @Override @@ -104,9 +103,6 @@ private void throwUnsupportedException(int requiredCapacity, Throwable cause) { @Override public int numNulls() { return numNulls; } - @Override - public boolean anyNullsSet() { return anyNullsSet; } - /** * Returns the dictionary Id for rowId. * @@ -640,12 +636,6 @@ public final int appendStruct(boolean isNull) { */ protected int numNulls; - /** - * True if there is at least one NULL byte set. This is an optimization for the writer, to skip - * having to clear NULL bits. - */ - protected boolean anyNullsSet; - /** * True if this column's values are fixed. This means the column values never change, even * across resets. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala index 068a17bf772e1..e460d0721e7bf 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala @@ -42,7 +42,6 @@ class ArrowColumnVectorSuite extends SparkFunSuite { val columnVector = new ArrowColumnVector(vector) assert(columnVector.dataType === BooleanType) - assert(columnVector.anyNullsSet) assert(columnVector.numNulls === 1) (0 until 10).foreach { i => @@ -71,7 +70,6 @@ class ArrowColumnVectorSuite extends SparkFunSuite { val columnVector = new ArrowColumnVector(vector) assert(columnVector.dataType === ByteType) - assert(columnVector.anyNullsSet) assert(columnVector.numNulls === 1) (0 until 10).foreach { i => @@ -100,7 +98,6 @@ class ArrowColumnVectorSuite extends SparkFunSuite { val columnVector = new ArrowColumnVector(vector) assert(columnVector.dataType === ShortType) - assert(columnVector.anyNullsSet) assert(columnVector.numNulls === 1) (0 until 10).foreach { i => @@ -129,7 +126,6 @@ class ArrowColumnVectorSuite extends SparkFunSuite { val columnVector = new ArrowColumnVector(vector) assert(columnVector.dataType === IntegerType) - assert(columnVector.anyNullsSet) assert(columnVector.numNulls === 1) (0 until 10).foreach { i => @@ -158,7 +154,6 @@ class ArrowColumnVectorSuite extends SparkFunSuite { val columnVector = new ArrowColumnVector(vector) assert(columnVector.dataType === LongType) - assert(columnVector.anyNullsSet) assert(columnVector.numNulls === 1) (0 until 10).foreach { i => @@ -187,7 +182,6 @@ class ArrowColumnVectorSuite extends SparkFunSuite { val columnVector = new ArrowColumnVector(vector) assert(columnVector.dataType === FloatType) - assert(columnVector.anyNullsSet) assert(columnVector.numNulls === 1) (0 until 10).foreach { i => @@ -216,7 +210,6 @@ class ArrowColumnVectorSuite extends SparkFunSuite { val columnVector = new ArrowColumnVector(vector) assert(columnVector.dataType === DoubleType) - assert(columnVector.anyNullsSet) assert(columnVector.numNulls === 1) (0 until 10).foreach { i => @@ -246,7 +239,6 @@ class ArrowColumnVectorSuite extends SparkFunSuite { val columnVector = new ArrowColumnVector(vector) assert(columnVector.dataType === StringType) - assert(columnVector.anyNullsSet) assert(columnVector.numNulls === 1) (0 until 10).foreach { i => @@ -274,7 +266,6 @@ class ArrowColumnVectorSuite extends SparkFunSuite { val columnVector = new ArrowColumnVector(vector) assert(columnVector.dataType === BinaryType) - assert(columnVector.anyNullsSet) assert(columnVector.numNulls === 1) (0 until 10).foreach { i => @@ -319,7 +310,6 @@ class ArrowColumnVectorSuite extends SparkFunSuite { val columnVector = new ArrowColumnVector(vector) assert(columnVector.dataType === ArrayType(IntegerType)) - assert(columnVector.anyNullsSet) assert(columnVector.numNulls === 1) val array0 = columnVector.getArray(0) @@ -383,7 +373,6 @@ class ArrowColumnVectorSuite extends SparkFunSuite { val columnVector = new ArrowColumnVector(vector) assert(columnVector.dataType === schema) - assert(columnVector.anyNullsSet) assert(columnVector.numNulls === 1) val row0 = columnVector.getStruct(0, 2) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala index c9c6bee513b53..d3ed8276b8f10 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala @@ -65,27 +65,22 @@ class ColumnarBatchSuite extends SparkFunSuite { column => val reference = mutable.ArrayBuffer.empty[Boolean] var idx = 0 - assert(!column.anyNullsSet()) assert(column.numNulls() == 0) column.appendNotNull() reference += false - assert(!column.anyNullsSet()) assert(column.numNulls() == 0) column.appendNotNulls(3) (1 to 3).foreach(_ => reference += false) - assert(!column.anyNullsSet()) assert(column.numNulls() == 0) column.appendNull() reference += true - assert(column.anyNullsSet()) assert(column.numNulls() == 1) column.appendNulls(3) (1 to 3).foreach(_ => reference += true) - assert(column.anyNullsSet()) assert(column.numNulls() == 4) idx = column.elementsAppended @@ -93,13 +88,11 @@ class ColumnarBatchSuite extends SparkFunSuite { column.putNotNull(idx) reference += false idx += 1 - assert(column.anyNullsSet()) assert(column.numNulls() == 4) column.putNull(idx) reference += true idx += 1 - assert(column.anyNullsSet()) assert(column.numNulls() == 5) column.putNulls(idx, 3) @@ -107,7 +100,6 @@ class ColumnarBatchSuite extends SparkFunSuite { reference += true reference += true idx += 3 - assert(column.anyNullsSet()) assert(column.numNulls() == 8) column.putNotNulls(idx, 4) @@ -116,7 +108,6 @@ class ColumnarBatchSuite extends SparkFunSuite { reference += false reference += false idx += 4 - assert(column.anyNullsSet()) assert(column.numNulls() == 8) reference.zipWithIndex.foreach { v =>