@@ -451,9 +451,11 @@ Status ValidateAndPrepareSchema(const WriteNodeOptions& write_node_options,
451451class DatasetWritingSinkNodeConsumer : public acero ::SinkNodeConsumer {
452452 public:
453453 DatasetWritingSinkNodeConsumer (std::shared_ptr<Schema> custom_schema,
454- FileSystemDatasetWriteOptions write_options)
454+ FileSystemDatasetWriteOptions write_options,
455+ uint64_t max_rows_queued)
455456 : custom_schema_(std::move(custom_schema)),
456- write_options_ (std::move(write_options)) {}
457+ write_options_ (std::move(write_options)),
458+ max_rows_queued_(max_rows_queued) {}
457459
458460 Status Init (const std::shared_ptr<Schema>& schema,
459461 acero::BackpressureControl* backpressure_control,
@@ -463,12 +465,12 @@ class DatasetWritingSinkNodeConsumer : public acero::SinkNodeConsumer {
463465 } else {
464466 schema_ = schema;
465467 }
466- ARROW_ASSIGN_OR_RAISE (
467- dataset_writer_,
468- internal::DatasetWriter::Make (
469- write_options_, plan-> query_context ()-> async_scheduler () ,
470- [backpressure_control] { backpressure_control->Pause (); },
471- [backpressure_control] { backpressure_control-> Resume (); }, [] {}));
468+ ARROW_ASSIGN_OR_RAISE (dataset_writer_,
469+ internal::DatasetWriter::Make (
470+ write_options_, plan-> query_context ()-> async_scheduler (),
471+ [backpressure_control] { backpressure_control-> Pause (); } ,
472+ [backpressure_control] { backpressure_control->Resume (); },
473+ [] {}, max_rows_queued_ ));
472474 return Status::OK ();
473475 }
474476
@@ -500,6 +502,7 @@ class DatasetWritingSinkNodeConsumer : public acero::SinkNodeConsumer {
500502 std::shared_ptr<Schema> custom_schema_;
501503 std::unique_ptr<internal::DatasetWriter> dataset_writer_;
502504 FileSystemDatasetWriteOptions write_options_;
505+ uint64_t max_rows_queued_;
503506 Future<> finished_ = Future<>::Make();
504507 std::shared_ptr<Schema> schema_ = nullptr ;
505508};
@@ -551,8 +554,9 @@ Result<acero::ExecNode*> MakeWriteNode(acero::ExecPlan* plan,
551554 ValidateAndPrepareSchema (write_node_options, input_schema, custom_schema));
552555
553556 std::shared_ptr<DatasetWritingSinkNodeConsumer> consumer =
554- std::make_shared<DatasetWritingSinkNodeConsumer>(custom_schema,
555- write_node_options.write_options );
557+ std::make_shared<DatasetWritingSinkNodeConsumer>(
558+ custom_schema, write_node_options.write_options ,
559+ write_node_options.max_rows_queued );
556560 ARROW_ASSIGN_OR_RAISE (
557561 auto node,
558562 // to preserve order explicitly sequence the exec batches
@@ -574,20 +578,21 @@ class TeeNode : public acero::MapNode,
574578 public:
575579 TeeNode (acero::ExecPlan* plan, std::vector<acero::ExecNode*> inputs,
576580 std::shared_ptr<Schema> output_schema,
577- FileSystemDatasetWriteOptions write_options)
581+ FileSystemDatasetWriteOptions write_options, uint64_t max_rows_queued )
578582 : MapNode(plan, std::move(inputs), std::move(output_schema)),
579- write_options_ (std::move(write_options)) {
583+ write_options_ (std::move(write_options)),
584+ max_rows_queued_(max_rows_queued) {
580585 if (write_options.preserve_order ) {
581586 sequencer_ = acero::util::SerialSequencingQueue::Make (this );
582587 }
583588 }
584589
585590 Status StartProducing () override {
586- ARROW_ASSIGN_OR_RAISE (
587- dataset_writer_,
588- internal::DatasetWriter::Make (
589- write_options_, plan_-> query_context ()-> async_scheduler () ,
590- [ this ] { Pause (); }, [ this ] { Resume (); }, [this ] { MapNode::Finish (); }));
591+ ARROW_ASSIGN_OR_RAISE (dataset_writer_,
592+ internal::DatasetWriter::Make (
593+ write_options_, plan_-> query_context ()-> async_scheduler (),
594+ [ this ] { Pause (); }, [ this ] { Resume (); } ,
595+ [this ] { MapNode::Finish (); }, max_rows_queued_ ));
591596 return MapNode::StartProducing ();
592597 }
593598
@@ -605,7 +610,8 @@ class TeeNode : public acero::MapNode,
605610 ValidateAndPrepareSchema (write_node_options, input_schema, custom_schema));
606611
607612 return plan->EmplaceNode <TeeNode>(plan, std::move (inputs), std::move (custom_schema),
608- std::move (write_node_options.write_options ));
613+ std::move (write_node_options.write_options ),
614+ write_node_options.max_rows_queued );
609615 }
610616
611617 const char * kind_name () const override { return " TeeNode" ; }
@@ -654,6 +660,7 @@ class TeeNode : public acero::MapNode,
654660 private:
655661 std::unique_ptr<internal::DatasetWriter> dataset_writer_;
656662 FileSystemDatasetWriteOptions write_options_;
663+ uint64_t max_rows_queued_;
657664 std::atomic<int32_t > backpressure_counter_ = 0 ;
658665 std::unique_ptr<acero::util::SerialSequencingQueue> sequencer_;
659666};
0 commit comments