diff --git a/java/dataset/src/main/cpp/jni_wrapper.cc b/java/dataset/src/main/cpp/jni_wrapper.cc index b3b5fe18c79..3392d54f6c7 100644 --- a/java/dataset/src/main/cpp/jni_wrapper.cc +++ b/java/dataset/src/main/cpp/jni_wrapper.cc @@ -399,11 +399,11 @@ JNIEXPORT void JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_closeDataset /* * Class: org_apache_arrow_dataset_jni_JniWrapper * Method: createScanner - * Signature: (J[Ljava/lang/String;JJ)J + * Signature: (J[Ljava/lang/String;JLjava/lang/String;J)J */ JNIEXPORT jlong JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_createScanner( JNIEnv* env, jobject, jlong dataset_id, jobjectArray columns, jlong batch_size, - jlong memory_pool_id) { + jstring filter, jlong memory_pool_id) { JNI_METHOD_START arrow::MemoryPool* pool = reinterpret_cast(memory_pool_id); if (pool == nullptr) { @@ -418,6 +418,11 @@ JNIEXPORT jlong JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_createScann std::vector column_vector = ToStringVector(env, columns); JniAssertOkOrThrow(scanner_builder->Project(column_vector)); } + if (filter != nullptr) { + arrow::compute::Expression filter_expr = JniGetOrThrow( + arrow::compute::Expression::FromString(JStringToCString(env, filter))); + JniAssertOkOrThrow(scanner_builder->Filter(filter_expr)); + } JniAssertOkOrThrow(scanner_builder->BatchSize(batch_size)); auto scanner = JniGetOrThrow(scanner_builder->Finish()); diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java index 1a9d4188c16..4ab2b17aaa1 100644 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java +++ b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java @@ -74,7 +74,7 @@ private JniWrapper() { * @param memoryPool identifier of memory pool used in the native scanner. * @return the native pointer of the arrow::dataset::Scanner instance. */ - public native long createScanner(long datasetId, String[] columns, long batchSize, long memoryPool); + public native long createScanner(long datasetId, String[] columns, long batchSize, String filter, long memoryPool); /** * Get a serialized schema from native instance of a Scanner. diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDataset.java b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDataset.java index 30ff1a9302f..1de91dbe0be 100644 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDataset.java +++ b/java/dataset/src/main/java/org/apache/arrow/dataset/jni/NativeDataset.java @@ -41,7 +41,7 @@ public synchronized NativeScanner newScan(ScanOptions options) { throw new NativeInstanceReleasedException(); } long scannerId = JniWrapper.get().createScanner(datasetId, options.getColumns().orElse(null), - options.getBatchSize(), context.getMemoryPool().getNativeInstanceId()); + options.getBatchSize(), options.getFilter().orElse(null), context.getMemoryPool().getNativeInstanceId()); return new NativeScanner(context, scannerId); } diff --git a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ScanOptions.java b/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ScanOptions.java index f5a1af384b2..6e9aa0f53de 100644 --- a/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ScanOptions.java +++ b/java/dataset/src/main/java/org/apache/arrow/dataset/scanner/ScanOptions.java @@ -26,6 +26,7 @@ */ public class ScanOptions { private final Optional columns; + private final Optional filter; private final long batchSize; /** @@ -43,7 +44,7 @@ public ScanOptions(String[] columns, long batchSize) { return null; } return present; - })); + }), Optional.empty()); } /** @@ -52,19 +53,24 @@ public ScanOptions(String[] columns, long batchSize) { * @param columns (Optional) Projected columns. {@link Optional#empty()} for scanning all columns. Otherwise, * Only columns present in the Array will be scanned. */ - public ScanOptions(long batchSize, Optional columns) { + public ScanOptions(long batchSize, Optional columns, Optional filter) { + Preconditions.checkNotNull(filter) Preconditions.checkNotNull(columns); this.batchSize = batchSize; this.columns = columns; } public ScanOptions(long batchSize) { - this(batchSize, Optional.empty()); + this(batchSize, Optional.empty(), Optional.empty()); } public Optional getColumns() { return columns; } + + public Optional getFilter() { + return filter; + } public long getBatchSize() { return batchSize;