Skip to content

Commit 1c26cd0

Browse files
committed
remove usage of parking_lot, wip spilling
1 parent d270530 commit 1c26cd0

File tree

6 files changed

+215
-88
lines changed

6 files changed

+215
-88
lines changed

datafusion/Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ smallvec = { version = "1.6", features = ["union"] }
7272
rand = "0.8"
7373
avro-rs = { version = "0.13", features = ["snappy"], optional = true }
7474
num-traits = { version = "0.2", optional = true }
75-
parking_lot = "0.11.2"
7675
uuid = { version = "0.8", features = ["v4"] }
7776
tempfile = "3"
7877

datafusion/src/execution/memory_management/memory_pool.rs

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,8 @@ use crate::execution::memory_management::{MemoryConsumer, MemoryConsumerId};
1919
use crate::physical_plan::aggregates::return_type;
2020
use hashbrown::HashMap;
2121
use log::{info, warn};
22-
use parking_lot::{Condvar, Mutex};
2322
use std::cmp::{max, min};
24-
use std::sync::Arc;
23+
use std::sync::{Arc, Condvar, Mutex};
2524

2625
pub(crate) trait ExecutionMemoryPool {
2726
fn memory_available(&self) -> usize;
@@ -105,19 +104,19 @@ impl ExecutionMemoryPool for ConstraintExecutionMemoryPool {
105104
}
106105

107106
fn memory_used(&self) -> usize {
108-
let a = self.memory_usage.lock();
107+
let a = self.memory_usage.lock().unwrap();
109108
a.values().sum()
110109
}
111110

112111
fn memory_used_partition(&self, partition_id: usize) -> usize {
113-
let partition_usage = self.memory_usage.lock();
112+
let partition_usage = self.memory_usage.lock().unwrap();
114113
partition_usage[partition_id].unwrap_or(0)
115114
}
116115

117116
fn acquire_memory(&self, required: usize, consumer: &dyn MemoryConsumer) -> usize {
118117
assert!(required > 0);
119118
let partition_id = consumer.partition_id();
120-
let mut partition_usage = self.memory_usage.lock();
119+
let mut partition_usage = self.memory_usage.lock().unwrap();
121120
if !partition_usage.contains_key(&partition_id) {
122121
partition_usage.entry(partition_id).or_insert(0);
123122
self.condvar.notify_all();
@@ -168,7 +167,7 @@ impl ExecutionMemoryPool for ConstraintExecutionMemoryPool {
168167
if granted_size == real_size {
169168
return;
170169
} else {
171-
let mut partition_usage = self.memory_usage.lock();
170+
let mut partition_usage = self.memory_usage.lock().unwrap();
172171
if granted_size > real_size {
173172
partition_usage.entry(consumer.partition_id()) -=
174173
granted_size - real_size;
@@ -182,7 +181,7 @@ impl ExecutionMemoryPool for ConstraintExecutionMemoryPool {
182181
}
183182

184183
fn release_memory(&self, release_size: usize, partition_id: usize) {
185-
let mut partition_usage = self.memory_usage.lock();
184+
let mut partition_usage = self.memory_usage.lock().unwrap();
186185
let current_mem = partition_usage[partition_id].unwrap_or(0);
187186
let to_free = if current_mem < release_size {
188187
warn!(
@@ -203,7 +202,7 @@ impl ExecutionMemoryPool for ConstraintExecutionMemoryPool {
203202
}
204203

205204
fn release_all(&self, partition_id: usize) -> usize {
206-
let mut partition_usage = self.memory_usage.lock();
205+
let mut partition_usage = self.memory_usage.lock().unwrap();
207206
let current_mem = partition_usage[partition_id].unwrap_or(0);
208207
if current_mem == 0 {
209208
return 0;

datafusion/src/execution/memory_management/mod.rs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,10 @@ use crate::execution::memory_management::memory_pool::{
2626
use async_trait::async_trait;
2727
use hashbrown::{HashMap, HashSet};
2828
use log::{debug, info};
29-
use parking_lot::Mutex;
3029
use std::fmt;
3130
use std::fmt::{Display, Formatter};
3231
use std::sync::atomic::{AtomicUsize, Ordering};
33-
use std::sync::Arc;
32+
use std::sync::{Arc, Mutex};
3433

3534
static mut CONSUMER_ID: AtomicUsize = AtomicUsize::new(0);
3635

@@ -60,7 +59,7 @@ impl MemoryManager {
6059
) -> Result<usize> {
6160
let partition_id = consumer.partition_id();
6261
let partition_manager = {
63-
let mut all_managers = self.partition_memory_manager.lock();
62+
let mut all_managers = self.partition_memory_manager.lock().unwrap();
6463
all_managers.entry(partition_id).or_insert_with(|| {
6564
PartitionMemoryManager::new(partition_id, self.clone())
6665
})
@@ -128,7 +127,7 @@ impl PartitionMemoryManager {
128127
required: usize,
129128
consumer: &dyn MemoryConsumer,
130129
) -> Result<usize> {
131-
let mut consumers = self.consumers.lock();
130+
let mut consumers = self.consumers.lock().unwrap();
132131
let mut got = self
133132
.memory_manager
134133
.acquire_exec_pool_memory(required, consumer);
@@ -154,7 +153,7 @@ impl PartitionMemoryManager {
154153

155154
pub fn show_memory_usage(&self) {
156155
info!("Memory usage for partition {}", self.partition_id);
157-
let mut consumers = self.consumers.lock();
156+
let mut consumers = self.consumers.lock().unwrap();
158157
let mut used = 0;
159158
for c in consumers.iter() {
160159
let cur_used = c.get_used();

datafusion/src/physical_plan/sorts/external_sort.rs

Lines changed: 102 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
use super::metrics::{
2121
BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet, RecordOutput,
2222
};
23-
use super::{RecordBatchStream, SendableRecordBatchStream, Statistics};
2423
use crate::error::{DataFusionError, Result};
2524
use crate::execution::disk_manager::{DiskManager, PathFile};
2625
use crate::execution::memory_management::{
@@ -40,6 +39,7 @@ use crate::physical_plan::sort_preserving_merge::SortPreservingMergeStream;
4039
use crate::physical_plan::sorts::in_mem_sort::InMemSortStream;
4140
use crate::physical_plan::sorts::sort::sort_batch;
4241
use crate::physical_plan::sorts::sort_preserving_merge::SortPreservingMergeStream;
42+
use crate::physical_plan::stream::RecordBatchReceiverStream;
4343
use crate::physical_plan::{
4444
common, DisplayFormatType, Distribution, ExecutionPlan, Partitioning,
4545
RecordBatchStream, SendableRecordBatchStream, Statistics,
@@ -48,22 +48,28 @@ use arrow::compute::aggregate::estimated_bytes_size;
4848
use arrow::compute::{sort::lexsort_to_indices, take};
4949
use arrow::datatypes::SchemaRef;
5050
use arrow::error::Result as ArrowResult;
51+
use arrow::io::ipc::read::{read_file_metadata, FileReader};
5152
use arrow::record_batch::RecordBatch;
5253
use arrow::{array::ArrayRef, error::ArrowError};
5354
use async_trait::async_trait;
54-
use futures::channel::mpsc;
5555
use futures::{Future, SinkExt, Stream, StreamExt};
56-
use log::{debug, info};
57-
use parking_lot::Mutex;
56+
use log::{debug, error, info};
5857
use pin_project_lite::pin_project;
5958
use std::any::Any;
59+
use std::fs::File;
6060
use std::pin::Pin;
6161
use std::sync::atomic::{AtomicIsize, AtomicUsize, Ordering};
62-
use std::sync::Arc;
62+
use std::sync::{Arc, Mutex};
6363
use std::task::{Context, Poll};
64-
use tokio::sync::mpsc::{Receiver, Sender};
64+
use tokio::sync::mpsc::{Receiver as TKReceiver, Sender as TKSender};
6565
use tokio::task;
6666

67+
enum ExternalSortingState {
68+
Insert,
69+
OutputWithMem,
70+
OutputAllDisk,
71+
}
72+
6773
struct ExternalSorter {
6874
id: MemoryConsumerId,
6975
schema: SchemaRef,
@@ -74,6 +80,7 @@ struct ExternalSorter {
7480
spilled_count: AtomicUsize,
7581
/// Sort expressions
7682
expr: Vec<PhysicalSortExpr>,
83+
exec_state: ExternalSortingState,
7784
runtime: RuntimeEnv,
7885
}
7986

@@ -93,46 +100,26 @@ impl ExternalSorter {
93100
spilled_bytes: AtomicUsize::new(0),
94101
spilled_count: AtomicUsize::new(0),
95102
expr,
103+
exec_state: ExternalSortingState::Insert,
96104
runtime,
97105
}
98106
}
99107

100-
fn insert_batch(
101-
&mut self,
102-
input: RecordBatch,
103-
schema: SchemaRef,
104-
expr: &[PhysicalSortExpr],
105-
) -> Result<()> {
106-
let size = batch_memory_size(&input);
107-
self.allocate(size)?;
108-
// sort each batch as it's inserted, more probably to be cache-resident
109-
let sorted_batch = sort_batch(input, schema, expr)?;
110-
let mut in_mem_batches = self.in_mem_batches.lock();
111-
in_mem_batches.push(sorted_batch);
112-
}
113-
114-
fn sort(&self) {}
115-
}
116-
117-
impl MemoryConsumer for ExternalSorter {
118-
fn name(&self) -> String {
119-
"ExternalSorter".to_owned()
120-
}
121-
122-
fn id(&self) -> &MemoryConsumerId {
123-
&self.id
108+
fn output_with_mem(&mut self) {
109+
assert_eq!(self.exec_state, ExternalSortingState::Insert);
110+
self.exec_state = ExternalSortingState::OutputWithMem
124111
}
125112

126-
fn memory_manager(&self) -> Arc<MemoryManager> {
127-
self.runtime.memory_manager.clone()
113+
fn spill_during_output(&mut self) {
114+
assert_eq!(self.exec_state, ExternalSortingState::OutputWithMem);
115+
self.exec_state = ExternalSortingState::OutputAllDisk
128116
}
129117

130-
async fn spill(&self, _size: usize, _trigger: &dyn MemoryConsumer) -> Result<usize> {
131-
let in_mem_batches = self.in_mem_batches.lock();
132-
118+
async fn spill_while_inserting(&self) -> usize {
119+
let mut in_mem_batches = self.in_mem_batches.lock().unwrap();
133120
// we could always get a chance to free some memory as long as we are holding some
134121
if in_mem_batches.len() == 0 {
135-
return Ok(0);
122+
return 0;
136123
}
137124

138125
info!(
@@ -156,13 +143,50 @@ impl MemoryConsumer for ExternalSorter {
156143
.await;
157144

158145
spill(stream, path, self.schema.clone())?;
146+
*in_mem_batches = vec![];
159147

160148
{
161-
let mut spills = self.spills.lock();
149+
let mut spills = self.spills.lock().unwrap();
162150
self.spilled_count.fetch_add(1, Ordering::SeqCst);
163151
self.spilled_bytes.fetch_add(total_size, Ordering::SeqCst);
164152
spills.push(path);
165153
}
154+
total_size
155+
}
156+
157+
fn insert_batch(
158+
&mut self,
159+
input: RecordBatch,
160+
schema: SchemaRef,
161+
expr: &[PhysicalSortExpr],
162+
) -> Result<()> {
163+
let size = batch_memory_size(&input);
164+
self.allocate(size)?;
165+
// sort each batch as it's inserted, more probably to be cache-resident
166+
let sorted_batch = sort_batch(input, schema, expr)?;
167+
let mut in_mem_batches = self.in_mem_batches.lock().unwrap();
168+
in_mem_batches.push(sorted_batch);
169+
}
170+
171+
fn sort(&self) {}
172+
}
173+
174+
impl MemoryConsumer for ExternalSorter {
175+
fn name(&self) -> String {
176+
"ExternalSorter".to_owned()
177+
}
178+
179+
fn id(&self) -> &MemoryConsumerId {
180+
&self.id
181+
}
182+
183+
fn memory_manager(&self) -> Arc<MemoryManager> {
184+
self.runtime.memory_manager.clone()
185+
}
186+
187+
async fn spill(&self, _size: usize, _trigger: &dyn MemoryConsumer) -> Result<usize> {
188+
let total_size = self.spill_while_inserting().await;
189+
166190
Ok(total_size)
167191
}
168192

@@ -211,17 +235,39 @@ async fn spill(
211235
path: String,
212236
schema: SchemaRef,
213237
) -> Result<()> {
214-
let (mut sender, receiver): (Sender<RecordBatch>, Receiver<RecordBatch>) =
238+
let (mut sender, receiver): (TKSender<RecordBatch>, TKReceiver<RecordBatch>) =
215239
tokio::sync::mpsc::channel(2);
216240
while let Some(item) = sorted_stream.next().await {
217241
sender.send(item).await.ok();
218242
}
219-
task::spawn_blocking(move || write_sorted(receiver, path, schema));
243+
let path_clone = path.clone();
244+
task::spawn_blocking(move || {
245+
if let Err(e) = write_sorted(receiver, path_clone, schema) {
246+
error!("Failure while spilling to path {}. Error: {}", path, e);
247+
}
248+
});
220249
Ok(())
221250
}
222251

252+
async fn read_spill_as_stream(
253+
path: String,
254+
schema: SchemaRef,
255+
) -> Result<SendableRecordBatchStream> {
256+
let (mut sender, receiver): (
257+
TKSender<ArrowResult<RecordBatch>>,
258+
TKReceiver<ArrowResult<RecordBatch>>,
259+
) = tokio::sync::mpsc::channel(2);
260+
let path_clone = path.clone();
261+
task::spawn_blocking(move || {
262+
if let Err(e) = read_spill(sender, path_clone) {
263+
error!("Failure while reading spill file: {}. Error: {}", path, e);
264+
}
265+
});
266+
Ok(RecordBatchReceiverStream::create(&schema, receiver))
267+
}
268+
223269
fn write_sorted(
224-
mut receiver: Receiver<RecordBatch>,
270+
mut receiver: TKReceiver<RecordBatch>,
225271
path: String,
226272
schema: SchemaRef,
227273
) -> Result<()> {
@@ -237,25 +283,17 @@ fn write_sorted(
237283
Ok(())
238284
}
239285

240-
struct SpillableSortedStream {
241-
id: MemoryConsumerId,
242-
schema: SchemaRef,
243-
in_mem_batches: Mutex<Vec<RecordBatch>>,
244-
/// Sort expressions
245-
expr: Vec<PhysicalSortExpr>,
246-
runtime: RuntimeEnv,
247-
}
248-
249-
impl SpillableSortedStream {
250-
fn new() -> Self {
251-
Self {}
286+
fn read_spill(
287+
mut sender: TKSender<ArrowResult<RecordBatch>>,
288+
path: String,
289+
) -> Result<()> {
290+
let mut file = File::open(&path).map_err(|e| e.into())?;
291+
let file_meta = read_file_metadata(&mut file).map_err(|e| from_arrow_err(&e))?;
292+
let reader = FileReader::new(&mut file, file_meta, None);
293+
for batch in reader {
294+
sender.blocking_send(batch)?;
252295
}
253-
254-
fn memory_used(&self) -> usize {}
255-
256-
fn get_sorted_stream(&self) {}
257-
258-
fn spill_remaining(&self) {}
296+
Ok(())
259297
}
260298

261299
/// Sort execution plan
@@ -402,14 +440,9 @@ impl ExecutionPlan for ExternalSortExec {
402440
}
403441
}
404442

405-
pin_project! {
406-
/// stream for sort plan
407-
struct ExternalSortStream {
408-
#[pin]
409-
output: futures::channel::oneshot::Receiver<ArrowResult<Option<RecordBatch>>>,
410-
finished: bool,
411-
schema: SchemaRef,
412-
}
443+
/// stream for sort plan
444+
struct ExternalSortStream {
445+
schema: SchemaRef,
413446
}
414447

415448
impl ExternalSortStream {
@@ -492,6 +525,7 @@ mod tests {
492525
use crate::physical_plan::coalesce_partitions::CoalescePartitionsExec;
493526
use crate::physical_plan::expressions::col;
494527
use crate::physical_plan::memory::MemoryExec;
528+
use crate::physical_plan::sorts::SortOptions;
495529
use crate::physical_plan::{
496530
collect,
497531
csv::{CsvExec, CsvReadOptions},
@@ -661,3 +695,5 @@ mod tests {
661695
Ok(())
662696
}
663697
}
698+
699+
impl ExternalSorter {}

0 commit comments

Comments
 (0)