Skip to content

Commit

Permalink
Dataset API: Allow empty projection (#24)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhztheplayer committed Mar 15, 2022
1 parent 0a2fa74 commit e980820
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 4 deletions.
7 changes: 3 additions & 4 deletions cpp/src/jni/dataset/jni_wrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -413,10 +413,9 @@ JNIEXPORT jlong JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_createScann
std::shared_ptr<arrow::dataset::ScannerBuilder> scanner_builder =
JniGetOrThrow(dataset->NewScan());
JniAssertOkOrThrow(scanner_builder->Pool(pool));
if (columns != nullptr) {
std::vector<std::string> column_vector = ToStringVector(env, columns);
JniAssertOkOrThrow(scanner_builder->Project(column_vector));
}

std::vector<std::string> column_vector = ToStringVector(env, columns);
JniAssertOkOrThrow(scanner_builder->Project(column_vector));
JniAssertOkOrThrow(scanner_builder->BatchSize(batch_size));

auto scanner = JniGetOrThrow(scanner_builder->Finish());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,27 @@ public void testParquetProjectSingleColumn() throws Exception {
AutoCloseables.close(datum);
}

@Test
public void testEmptyProjector() throws Exception {
ParquetWriteSupport writeSupport = ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP.newFolder(), 1, "a",
2, "b");

FileSystemDatasetFactory factory = new FileSystemDatasetFactory(rootAllocator(), NativeMemoryPool.getDefault(),
FileFormat.PARQUET, writeSupport.getOutputURI());
ScanOptions options = new ScanOptions(new String[]{}, Filter.EMPTY, 100);
Schema schema = inferResultSchemaFromFactory(factory, options);
List<ArrowRecordBatch> datum = collectResultFromFactory(factory, options);
org.apache.avro.Schema expectedSchema = truncateAvroSchema(writeSupport.getAvroSchema(), 0, 1);

assertSingleTaskProduced(factory, options);
assertEquals(0, schema.getFields().size());
assertEquals(1, datum.size());

AutoCloseables.close(datum);
AutoCloseables.close(factory);
}


@Test
public void testParquetBatchSize() throws Exception {
ParquetWriteSupport writeSupport = ParquetWriteSupport.writeTempFile(AVRO_SCHEMA_USER, TMP.newFolder(),
Expand Down

0 comments on commit e980820

Please sign in to comment.