From e980820a14c07627d890e76aa75a50af3b55487b Mon Sep 17 00:00:00 2001 From: Hongze Zhang Date: Thu, 1 Jul 2021 17:34:57 +0800 Subject: [PATCH] Dataset API: Allow empty projection (#24) --- cpp/src/jni/dataset/jni_wrapper.cc | 7 +++---- .../dataset/file/TestFileSystemDataset.java | 21 +++++++++++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/cpp/src/jni/dataset/jni_wrapper.cc b/cpp/src/jni/dataset/jni_wrapper.cc index 87fa14443e154..3f1ef41f688db 100644 --- a/cpp/src/jni/dataset/jni_wrapper.cc +++ b/cpp/src/jni/dataset/jni_wrapper.cc @@ -413,10 +413,9 @@ JNIEXPORT jlong JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_createScann std::shared_ptr scanner_builder = JniGetOrThrow(dataset->NewScan()); JniAssertOkOrThrow(scanner_builder->Pool(pool)); - if (columns != nullptr) { - std::vector column_vector = ToStringVector(env, columns); - JniAssertOkOrThrow(scanner_builder->Project(column_vector)); - } + + std::vector column_vector = ToStringVector(env, columns); + JniAssertOkOrThrow(scanner_builder->Project(column_vector)); JniAssertOkOrThrow(scanner_builder->BatchSize(batch_size)); auto scanner = JniGetOrThrow(scanner_builder->Finish()); diff --git a/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java b/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java index 6a605ee2b9761..91f261ddf0edb 100644 --- a/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java +++ b/java/dataset/src/test/java/org/apache/arrow/dataset/file/TestFileSystemDataset.java @@ -143,6 +143,27 @@ public void testParquetProjectSingleColumn() throws Exception { AutoCloseables.close(datum); } + @Test + public void testEmptyProjector() throws Exception { + ParquetWriteSupport writeSupport = ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP.newFolder(), 1, "a", + 2, "b"); + + FileSystemDatasetFactory factory = new FileSystemDatasetFactory(rootAllocator(), NativeMemoryPool.getDefault(), + FileFormat.PARQUET, writeSupport.getOutputURI()); + ScanOptions options = new ScanOptions(new String[]{}, Filter.EMPTY, 100); + Schema schema = inferResultSchemaFromFactory(factory, options); + List datum = collectResultFromFactory(factory, options); + org.apache.avro.Schema expectedSchema = truncateAvroSchema(writeSupport.getAvroSchema(), 0, 1); + + assertSingleTaskProduced(factory, options); + assertEquals(0, schema.getFields().size()); + assertEquals(1, datum.size()); + + AutoCloseables.close(datum); + AutoCloseables.close(factory); + } + + @Test public void testParquetBatchSize() throws Exception { ParquetWriteSupport writeSupport = ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP.newFolder(),