Skip to content

Commit 2e6eb0c

Browse files
committed
ExternalSortExec v1
1 parent 1c26cd0 commit 2e6eb0c

File tree

12 files changed

+456
-307
lines changed

12 files changed

+456
-307
lines changed

datafusion/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ path = "src/lib.rs"
4040
default = ["crypto_expressions", "regex_expressions", "unicode_expressions"]
4141
simd = ["arrow/simd"]
4242
crypto_expressions = ["md-5", "sha2"]
43-
regex_expressions = ["regex", "lazy_static"]
43+
regex_expressions = ["regex"]
4444
unicode_expressions = ["unicode-segmentation"]
4545
# Used for testing ONLY: causes all values to hash to the same value (test for collisions)
4646
force_hash_collisions = []
@@ -67,7 +67,7 @@ sha2 = { version = "^0.9.1", optional = true }
6767
ordered-float = "2.0"
6868
unicode-segmentation = { version = "^1.7.1", optional = true }
6969
regex = { version = "^1.4.3", optional = true }
70-
lazy_static = { version = "^1.4.0", optional = true }
70+
lazy_static = { version = "^1.4.0"}
7171
smallvec = { version = "1.6", features = ["union"] }
7272
rand = "0.8"
7373
avro-rs = { version = "0.13", features = ["snappy"], optional = true }

datafusion/src/execution/disk_manager.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ fn get_file(file_name: &str, local_dirs: &Vec<String>) -> String {
7878
let mut hasher = DefaultHasher::new();
7979
file_name.hash(&mut hasher);
8080
let hash = hasher.finish();
81-
let dir = local_dirs[hash.rem_euclid(local_dirs.len() as u64)];
81+
let dir = &local_dirs[hash.rem_euclid(local_dirs.len() as u64) as usize];
8282
let mut path = PathBuf::new();
8383
path.push(dir);
8484
path.push(file_name);
@@ -88,9 +88,9 @@ fn get_file(file_name: &str, local_dirs: &Vec<String>) -> String {
8888
fn create_tmp_file(local_dirs: &Vec<String>) -> Result<String> {
8989
let name = Uuid::new_v4().to_string();
9090
let mut path = get_file(&*name, local_dirs);
91-
while path.exists() {
91+
while Path::new(path.as_str()).exists() {
9292
path = get_file(&*Uuid::new_v4().to_string(), local_dirs);
9393
}
94-
File::create(&path).map_err(|e| e.into())?;
94+
File::create(&path)?;
9595
Ok(path)
9696
}

datafusion/src/execution/memory_management/memory_pool.rs

Lines changed: 62 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,23 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use crate::execution::memory_management::{MemoryConsumer, MemoryConsumerId};
19-
use crate::physical_plan::aggregates::return_type;
18+
use crate::execution::memory_management::MemoryConsumer;
2019
use hashbrown::HashMap;
2120
use log::{info, warn};
22-
use std::cmp::{max, min};
21+
use std::cmp::min;
22+
use std::fmt;
23+
use std::fmt::{Debug, Formatter};
2324
use std::sync::{Arc, Condvar, Mutex};
2425

25-
pub(crate) trait ExecutionMemoryPool {
26+
pub(crate) trait ExecutionMemoryPool: Sync + Send + Debug {
2627
fn memory_available(&self) -> usize;
2728
fn memory_used(&self) -> usize;
2829
fn memory_used_partition(&self, partition_id: usize) -> usize;
29-
fn acquire_memory(&self, required: usize, consumer: &dyn MemoryConsumer) -> usize;
30+
fn acquire_memory(
31+
&self,
32+
required: usize,
33+
consumer: &Arc<dyn MemoryConsumer>,
34+
) -> usize;
3035
fn update_usage(
3136
&self,
3237
granted_size: usize,
@@ -49,6 +54,14 @@ impl DummyExecutionMemoryPool {
4954
}
5055
}
5156

57+
impl Debug for DummyExecutionMemoryPool {
58+
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
59+
f.debug_struct("DummyExecutionMemoryPool")
60+
.field("total", &self.pool_size)
61+
.finish()
62+
}
63+
}
64+
5265
impl ExecutionMemoryPool for DummyExecutionMemoryPool {
5366
fn memory_available(&self) -> usize {
5467
usize::MAX
@@ -62,7 +75,11 @@ impl ExecutionMemoryPool for DummyExecutionMemoryPool {
6275
0
6376
}
6477

65-
fn acquire_memory(&self, required: usize, _consumer: &dyn MemoryConsumer) -> usize {
78+
fn acquire_memory(
79+
&self,
80+
required: usize,
81+
_consumer: &Arc<dyn MemoryConsumer>,
82+
) -> usize {
6683
required
6784
}
6885

@@ -98,6 +115,15 @@ impl ConstraintExecutionMemoryPool {
98115
}
99116
}
100117

118+
impl Debug for ConstraintExecutionMemoryPool {
119+
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
120+
f.debug_struct("ConstraintExecutionMemoryPool")
121+
.field("total", &self.pool_size)
122+
.field("used", &self.memory_used())
123+
.finish()
124+
}
125+
}
126+
101127
impl ExecutionMemoryPool for ConstraintExecutionMemoryPool {
102128
fn memory_available(&self) -> usize {
103129
self.pool_size - self.memory_used()
@@ -110,10 +136,17 @@ impl ExecutionMemoryPool for ConstraintExecutionMemoryPool {
110136

111137
fn memory_used_partition(&self, partition_id: usize) -> usize {
112138
let partition_usage = self.memory_usage.lock().unwrap();
113-
partition_usage[partition_id].unwrap_or(0)
139+
match partition_usage.get(&partition_id) {
140+
None => 0,
141+
Some(v) => *v,
142+
}
114143
}
115144

116-
fn acquire_memory(&self, required: usize, consumer: &dyn MemoryConsumer) -> usize {
145+
fn acquire_memory(
146+
&self,
147+
required: usize,
148+
consumer: &Arc<dyn MemoryConsumer>,
149+
) -> usize {
117150
assert!(required > 0);
118151
let partition_id = consumer.partition_id();
119152
let mut partition_usage = self.memory_usage.lock().unwrap();
@@ -138,7 +171,7 @@ impl ExecutionMemoryPool for ConstraintExecutionMemoryPool {
138171
Some(max_available) => min(required, max_available),
139172
};
140173

141-
let total_used = partition_usage.values().sum();
174+
let total_used: usize = partition_usage.values().sum();
142175
let total_available = self.pool_size - total_used;
143176
// Only give it as much memory as is free, which might be none if it reached 1 / num_active_partition
144177
let to_grant = min(max_grant, total_available);
@@ -147,8 +180,11 @@ impl ExecutionMemoryPool for ConstraintExecutionMemoryPool {
147180
// if we can't give it this much now, wait for other tasks to free up memory
148181
// (this happens if older tasks allocated lots of memory before N grew)
149182
if to_grant < required && current_mem + to_grant < min_memory_per_partition {
150-
info!("{} waiting for at least 1/2N of pool to be free", consumer);
151-
self.condvar.wait(&mut partition_usage);
183+
info!(
184+
"{:?} waiting for at least 1/2N of pool to be free",
185+
consumer
186+
);
187+
self.condvar.wait(partition_usage);
152188
} else {
153189
*partition_usage.entry(partition_id).or_insert(0) += to_grant;
154190
return to_grant;
@@ -169,20 +205,24 @@ impl ExecutionMemoryPool for ConstraintExecutionMemoryPool {
169205
} else {
170206
let mut partition_usage = self.memory_usage.lock().unwrap();
171207
if granted_size > real_size {
172-
partition_usage.entry(consumer.partition_id()) -=
208+
*partition_usage.entry(consumer.partition_id()).or_insert(0) -=
173209
granted_size - real_size;
174210
} else {
175211
// TODO: this would have caused OOM already if size estimation ahead is much smaller than
176212
// that of actual allocation
177-
partition_usage.entry(consumer.partition_id()) +=
213+
*partition_usage.entry(consumer.partition_id()).or_insert(0) +=
178214
real_size - granted_size;
179215
}
180216
}
181217
}
182218

183219
fn release_memory(&self, release_size: usize, partition_id: usize) {
184220
let mut partition_usage = self.memory_usage.lock().unwrap();
185-
let current_mem = partition_usage[partition_id].unwrap_or(0);
221+
let current_mem = match partition_usage.get(&partition_id) {
222+
None => 0,
223+
Some(v) => *v,
224+
};
225+
186226
let to_free = if current_mem < release_size {
187227
warn!(
188228
"Release called to free {} but partition only holds {} from the pool",
@@ -193,8 +233,9 @@ impl ExecutionMemoryPool for ConstraintExecutionMemoryPool {
193233
release_size
194234
};
195235
if partition_usage.contains_key(&partition_id) {
196-
partition_usage.entry(partition_id) -= to_free;
197-
if partition_usage[partition_id].unwrap() == 0 {
236+
let entry = partition_usage.entry(partition_id).or_insert(0);
237+
*entry -= to_free;
238+
if *entry == 0 {
198239
partition_usage.remove(&partition_id);
199240
}
200241
}
@@ -203,10 +244,12 @@ impl ExecutionMemoryPool for ConstraintExecutionMemoryPool {
203244

204245
fn release_all(&self, partition_id: usize) -> usize {
205246
let mut partition_usage = self.memory_usage.lock().unwrap();
206-
let current_mem = partition_usage[partition_id].unwrap_or(0);
207-
if current_mem == 0 {
208-
return 0;
247+
let mut current_mem = 0;
248+
match partition_usage.get(&partition_id) {
249+
None => return 0,
250+
Some(v) => current_mem = *v,
209251
}
252+
210253
partition_usage.remove(&partition_id);
211254
self.condvar.notify_all();
212255
return current_mem;

datafusion/src/execution/memory_management/mod.rs

Lines changed: 29 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,16 @@
1818
pub mod memory_pool;
1919

2020
use crate::error::DataFusionError::OutOfMemory;
21-
use crate::error::{DataFusionError, Result};
22-
use crate::execution::disk_manager::DiskManager;
21+
use crate::error::Result;
2322
use crate::execution::memory_management::memory_pool::{
2423
ConstraintExecutionMemoryPool, DummyExecutionMemoryPool, ExecutionMemoryPool,
2524
};
2625
use async_trait::async_trait;
2726
use hashbrown::{HashMap, HashSet};
2827
use log::{debug, info};
28+
use std::borrow::BorrowMut;
2929
use std::fmt;
30-
use std::fmt::{Display, Formatter};
30+
use std::fmt::{Debug, Display, Formatter};
3131
use std::sync::atomic::{AtomicUsize, Ordering};
3232
use std::sync::{Arc, Mutex};
3333

@@ -41,21 +41,21 @@ pub struct MemoryManager {
4141

4242
impl MemoryManager {
4343
pub fn new(exec_pool_size: usize) -> Self {
44-
let pool: dyn ExecutionMemoryPool = if exec_pool_size == usize::MAX {
45-
DummyExecutionMemoryPool::new()
44+
let execution_pool = if exec_pool_size == usize::MAX {
45+
Arc::new(DummyExecutionMemoryPool::new() as dyn ExecutionMemoryPool)
4646
} else {
47-
ConstraintExecutionMemoryPool::new(exec_pool_size)
47+
Arc::new(ConstraintExecutionMemoryPool::new(exec_pool_size))
4848
};
4949
Self {
50-
execution_pool: Arc::new(pool),
50+
execution_pool,
5151
partition_memory_manager: Arc::new(Mutex::new(HashMap::new())),
5252
}
5353
}
5454

5555
pub fn acquire_exec_memory(
5656
self: Arc<Self>,
5757
required: usize,
58-
consumer: &dyn MemoryConsumer,
58+
consumer: Arc<dyn MemoryConsumer>,
5959
) -> Result<usize> {
6060
let partition_id = consumer.partition_id();
6161
let partition_manager = {
@@ -70,7 +70,7 @@ impl MemoryManager {
7070
pub fn acquire_exec_pool_memory(
7171
&self,
7272
required: usize,
73-
consumer: &dyn MemoryConsumer,
73+
consumer: &Arc<dyn MemoryConsumer>,
7474
) -> usize {
7575
self.execution_pool.acquire_memory(required, consumer)
7676
}
@@ -110,7 +110,7 @@ fn next_id() -> usize {
110110
pub struct PartitionMemoryManager {
111111
memory_manager: Arc<MemoryManager>,
112112
partition_id: usize,
113-
consumers: Arc<Mutex<HashSet<dyn MemoryConsumer>>>,
113+
consumers: Arc<Mutex<HashSet<Arc<dyn MemoryConsumer>>>>,
114114
}
115115

116116
impl PartitionMemoryManager {
@@ -125,12 +125,12 @@ impl PartitionMemoryManager {
125125
pub fn acquire_exec_memory(
126126
&mut self,
127127
required: usize,
128-
consumer: &dyn MemoryConsumer,
128+
consumer: Arc<dyn MemoryConsumer>,
129129
) -> Result<usize> {
130-
let mut consumers = self.consumers.lock().unwrap();
130+
let mut consumers = self.consumers.lock().unwrap().borrow_mut();
131131
let mut got = self
132132
.memory_manager
133-
.acquire_exec_pool_memory(required, consumer);
133+
.acquire_exec_pool_memory(required, &consumer);
134134
if got < required {
135135
// spill others first
136136
}
@@ -162,14 +162,14 @@ impl PartitionMemoryManager {
162162
info!(
163163
"Consumer {} acquired {}",
164164
c.str_repr(),
165-
human_readable_size(cur_used)
165+
human_readable_size(cur_used as usize)
166166
)
167167
}
168168
}
169169
let no_consumer_size = self
170170
.memory_manager
171171
.exec_memory_used_for_partition(self.partition_id)
172-
- used;
172+
- (used as usize);
173173
info!(
174174
"{} bytes of memory were used for partition {} without specific consumer",
175175
human_readable_size(no_consumer_size),
@@ -178,10 +178,10 @@ impl PartitionMemoryManager {
178178
}
179179
}
180180

181-
#[derive(Debug, Clone)]
181+
#[derive(Clone, Debug)]
182182
pub struct MemoryConsumerId {
183-
partition_id: usize,
184-
id: usize,
183+
pub partition_id: usize,
184+
pub id: usize,
185185
}
186186

187187
impl MemoryConsumerId {
@@ -198,20 +198,22 @@ impl Display for MemoryConsumerId {
198198
}
199199

200200
#[async_trait]
201-
pub trait MemoryConsumer {
201+
pub trait MemoryConsumer: Send + Sync + Debug {
202202
/// Display name of the consumer
203203
fn name(&self) -> String;
204204
/// Unique id of the consumer
205205
fn id(&self) -> &MemoryConsumerId;
206206

207207
fn memory_manager(&self) -> Arc<MemoryManager>;
208208
/// partition that the consumer belongs to
209-
fn partition_id(&self) -> uszie {
209+
fn partition_id(&self) -> usize {
210210
self.id().partition_id
211211
}
212212
/// Try allocate `required` bytes as needed
213-
fn allocate(&self, required: usize) -> Result<()> {
214-
let got = self.memory_manager().acquire_exec_memory(required, self)?;
213+
fn allocate(self: Arc<Self>, required: usize) -> Result<()> {
214+
let got = self
215+
.memory_manager()
216+
.acquire_exec_memory(required, self.clone())?;
215217
self.update_used(got as isize);
216218
Ok(())
217219
}
@@ -250,15 +252,15 @@ fn human_readable_size(size: usize) -> String {
250252
let size = size as u64;
251253
let (value, unit) = {
252254
if size >= 2 * TB {
253-
(size as f64 / TB, "TB")
255+
(size as f64 / TB as f64, "TB")
254256
} else if size >= 2 * GB {
255-
(size as f64 / GB, "GB")
257+
(size as f64 / GB as f64, "GB")
256258
} else if size >= 2 * MB {
257-
(size as f64 / MB, "MB")
259+
(size as f64 / MB as f64, "MB")
258260
} else if size >= 2 * KB {
259-
(size as f64 / KB, "KB")
261+
(size as f64 / KB as f64, "KB")
260262
} else {
261-
(size, "B")
263+
(size as f64, "B")
262264
}
263265
};
264266
format!("{:.1} {}", value, unit)

0 commit comments

Comments
 (0)