Skip to content

Commit daec6d9

Browse files
author
Rafał Hibner
committed
Merge branch 'arrowPipeNoQueue' into combined
2 parents 909fbd9 + b654bce commit daec6d9

File tree

3 files changed

+55
-17
lines changed

3 files changed

+55
-17
lines changed

cpp/src/arrow/acero/options.h

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -885,18 +885,32 @@ class ARROW_ACERO_EXPORT PipeSourceNodeOptions : public ExecNodeOptions {
885885
output_schema(std::move(output_schema)),
886886
ordering(std::move(ordering)) {}
887887

888-
/// \brief List of declarations that will receive duplicated ExecBatches
888+
/// \brief Pipe name used to match with pipe sink
889889
std::string pipe_name;
890+
891+
/// \brief Expected schema of data. Validated during initialization.
890892
std::shared_ptr<Schema> output_schema;
893+
894+
/// \brief Expected ordering of data. Validated during initialization.
891895
Ordering ordering;
892896
};
893897

894898
class ARROW_ACERO_EXPORT PipeSinkNodeOptions : public ExecNodeOptions {
895899
public:
896900
PipeSinkNodeOptions(std::string pipe_name) : pipe_name(std::move(pipe_name)) {}
897901

898-
/// \brief List of declarations that will receive duplicated ExecBatches
902+
/// \brief Pipe name used to match with pipe sources
899903
std::string pipe_name;
904+
905+
/// \brief pause_on_any controls pausing strategy. If true sink input will be paused
906+
/// when any source is paused. If false sink input will be paused hen all sources are
907+
/// paused
908+
bool pause_on_any{true};
909+
910+
/// \brief stop_on_any controls stopping strategy. If true sink input will be stopped
911+
/// when any source is stopped. If false sink input will be stopped hen all sources are
912+
/// stopped
913+
bool stop_on_any{false};
900914
};
901915

902916
/// @}

cpp/src/arrow/acero/pipe_node.cc

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -113,20 +113,23 @@ class PipeSinkBackpressureControl : public BackpressureControl {
113113

114114
class PipeSinkNode : public ExecNode {
115115
public:
116-
PipeSinkNode(ExecPlan* plan, std::vector<ExecNode*> inputs, std::string pipe_name)
116+
PipeSinkNode(ExecPlan* plan, std::vector<ExecNode*> inputs, std::string pipe_name,
117+
bool pause_on_any, bool stop_on_any)
117118
: ExecNode(plan, inputs, /*input_labels=*/{pipe_name}, {}) {
118119
pipe_ = std::make_shared<Pipe>(
119120
plan, std::move(pipe_name),
120121
std::make_unique<PipeSinkBackpressureControl>(inputs[0], this),
121-
[this]() { return StopProducing(); }, inputs[0]->ordering());
122+
[this]() { return StopProducing(); }, inputs[0]->ordering(), pause_on_any,
123+
stop_on_any);
122124
}
123125

124126
static Result<ExecNode*> Make(ExecPlan* plan, std::vector<ExecNode*> inputs,
125127
const ExecNodeOptions& options) {
126128
RETURN_NOT_OK(ValidateExecNodeInputs(plan, inputs, 1, "PipeSinkNode"));
127129
const auto& pipe_tee_options = checked_cast<const PipeSinkNodeOptions&>(options);
128-
return plan->EmplaceNode<PipeSinkNode>(plan, std::move(inputs),
129-
pipe_tee_options.pipe_name);
130+
return plan->EmplaceNode<PipeSinkNode>(
131+
plan, std::move(inputs), pipe_tee_options.pipe_name,
132+
pipe_tee_options.pause_on_any, pipe_tee_options.stop_on_any);
130133
}
131134
static const char kKindName[];
132135
const char* kind_name() const override { return kKindName; }
@@ -172,8 +175,9 @@ const char PipeSinkNode::kKindName[] = "PipeSinkNode";
172175

173176
class PipeTeeNode : public PipeSource, public PipeSinkNode {
174177
public:
175-
PipeTeeNode(ExecPlan* plan, std::vector<ExecNode*> inputs, std::string pipe_name)
176-
: PipeSinkNode(plan, inputs, pipe_name) {
178+
PipeTeeNode(ExecPlan* plan, std::vector<ExecNode*> inputs, std::string pipe_name,
179+
bool pause_on_any, bool stop_on_any)
180+
: PipeSinkNode(plan, inputs, pipe_name, pause_on_any, stop_on_any) {
177181
output_schema_ = inputs[0]->output_schema();
178182
auto st = PipeSinkNode::pipe_->addSyncSource(this);
179183
if (ARROW_PREDICT_FALSE(!st.ok())) { // this should never happen
@@ -185,8 +189,9 @@ class PipeTeeNode : public PipeSource, public PipeSinkNode {
185189
const ExecNodeOptions& options) {
186190
RETURN_NOT_OK(ValidateExecNodeInputs(plan, inputs, 1, "PipeTeeNode"));
187191
const auto& pipe_tee_options = checked_cast<const PipeSinkNodeOptions&>(options);
188-
return plan->EmplaceNode<PipeTeeNode>(plan, std::move(inputs),
189-
pipe_tee_options.pipe_name);
192+
return plan->EmplaceNode<PipeTeeNode>(
193+
plan, std::move(inputs), pipe_tee_options.pipe_name,
194+
pipe_tee_options.pause_on_any, pipe_tee_options.stop_on_any);
190195
}
191196
static const char kKindName[];
192197
const char* kind_name() const override { return kKindName; }
@@ -255,12 +260,14 @@ Status PipeSource::Validate(const Ordering& ordering) {
255260

256261
Pipe::Pipe(ExecPlan* plan, std::string pipe_name,
257262
std::unique_ptr<BackpressureControl> ctrl,
258-
std::function<Status()> stopProducing, Ordering ordering, bool stop_on_any)
263+
std::function<Status()> stopProducing, Ordering ordering, bool pause_on_any,
264+
bool stop_on_any)
259265
: plan_(plan),
260266
ordering_(ordering),
261267
pipe_name_(pipe_name),
262268
ctrl_(std::move(ctrl)),
263269
stopProducing_(stopProducing),
270+
pause_on_any_(pause_on_any),
264271
stop_on_any_(stop_on_any) {}
265272

266273
const Ordering& Pipe::ordering() const { return ordering_; }
@@ -269,8 +276,15 @@ void Pipe::Pause(PipeSource* output, int counter) {
269276
std::lock_guard<std::mutex> lg(mutex_);
270277
if (!paused_[output]) {
271278
paused_[output] = true;
272-
if (0 == paused_count_++) {
273-
ctrl_->Pause();
279+
size_t paused_count = ++paused_count_;
280+
if (pause_on_any_) {
281+
if (paused_count == 1) {
282+
ctrl_->Pause();
283+
}
284+
} else {
285+
if (paused_count == CountSources()) {
286+
ctrl_->Pause();
287+
}
274288
}
275289
}
276290
}
@@ -279,8 +293,16 @@ void Pipe::Resume(PipeSource* output, int counter) {
279293
std::lock_guard<std::mutex> lg(mutex_);
280294
if (paused_[output]) {
281295
paused_[output] = false;
282-
if (0 == --paused_count_) {
283-
ctrl_->Resume();
296+
297+
size_t paused_count = --paused_count_;
298+
if (pause_on_any_) {
299+
if (paused_count == 0) {
300+
ctrl_->Resume();
301+
}
302+
} else {
303+
if (paused_count == CountSources() - 1) {
304+
ctrl_->Resume();
305+
}
284306
}
285307
}
286308
}

cpp/src/arrow/acero/pipe_node.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ class ARROW_ACERO_EXPORT Pipe {
5656
public:
5757
Pipe(ExecPlan* plan, std::string pipe_name, std::unique_ptr<BackpressureControl> ctrl,
5858
std::function<Status()> stopProducing, Ordering ordering = Ordering::Unordered(),
59-
bool stop_on_any = false);
59+
bool pause_on_any = true, bool stop_on_any = false);
6060

6161
const Ordering& ordering() const;
6262

@@ -110,11 +110,13 @@ class ARROW_ACERO_EXPORT Pipe {
110110
// backpressure
111111
std::unordered_map<PipeSource*, bool> paused_;
112112
std::mutex mutex_;
113-
std::atomic<int32_t> paused_count_;
113+
std::atomic_size_t paused_count_;
114114
std::unique_ptr<BackpressureControl> ctrl_;
115115
// stopProducing
116116
std::atomic_size_t stopped_count_;
117117
std::function<Status()> stopProducing_;
118+
119+
const bool pause_on_any_;
118120
const bool stop_on_any_;
119121
};
120122

0 commit comments

Comments
 (0)