Skip to content

Commit ec3d20b

Browse files
authored
Adds instrumentation to delimited LIST operations in CLI (#18134)
## Which issue does this PR close? This does not fully close, but is an incremental building block component for: - #17207 The full context of how this code is likely to progress can be seen in the POC for this effort: - #17266 ## Rationale for this change Continued progress filling out methods that are instrumented by the instrumented object store ## What changes are included in this PR? - Adds instrumentation around delimited list operations into the instrumented object store - Adds test cases for the new code ## Are these changes tested? Yes, unit tests have been added. Example output: ```sql DataFusion CLI v50.2.0 > CREATE EXTERNAL TABLE overture_partitioned STORED AS PARQUET LOCATION 's3://overturemaps-us-west-2/release/2025-09-24.0/theme=addresses/'; 0 row(s) fetched. Elapsed 2.307 seconds. > \object_store_profiling trace ObjectStore Profile mode set to Trace > select count(*) from overture_partitioned; +-----------+ | count(*) | +-----------+ | 446544475 | +-----------+ 1 row(s) fetched. Elapsed 1.932 seconds. Object Store Profiling Instrumented Object Store: instrument_mode: Trace, inner: AmazonS3(overturemaps-us-west-2) 2025-10-17T17:05:27.922724180+00:00 operation=List duration=0.132154s path=release/2025-09-24.0/theme=addresses 2025-10-17T17:05:28.054894440+00:00 operation=List duration=0.049048s path=release/2025-09-24.0/theme=addresses/type=address 2025-10-17T17:05:28.104233937+00:00 operation=Get duration=0.053522s size=8 range: bytes=1070778162-1070778169 path=release/2025-09-24.0/theme=addresses/type=address/part-00000-52872134-68de-44a6-822d-15fa29a0f606-c000.zstd.parquet 2025-10-17T17:05:28.106862343+00:00 operation=Get duration=0.108103s size=8 range: bytes=1017940335-1017940342 path=release/2025-09-24.0/theme=addresses/type=address/part-00003-52872134-68de-44a6-822d-15fa29a0f606-c000.zstd.parquet ... 2025-10-17T17:05:28.589084204+00:00 operation=Get duration=0.084737s size=836971 range: bytes=1112791717-1113628687 path=release/2025-09-24.0/theme=addresses/type=address/part-00009-52872134-68de-44a6-822d-15fa29a0f606-c000.zstd.parquet Summaries: List count: 2 duration min: 0.049048s duration max: 0.132154s duration avg: 0.090601s Get count: 33 duration min: 0.045500s duration max: 0.162114s duration avg: 0.089775s size min: 8 B size max: 917946 B size avg: 336000 B size sum: 11088026 B > ``` Note that a `LIST` report showing a duration must be a `list_with_delimiter()` call because a standard `list` call does not currently report a duration. ## Are there any user-facing changes? No-ish cc @alamb
1 parent 7d294f1 commit ec3d20b

File tree

1 file changed

+49
-0
lines changed

1 file changed

+49
-0
lines changed

datafusion-cli/src/object_storage/instrumented.rs

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,28 @@ impl InstrumentedObjectStore {
163163

164164
ret
165165
}
166+
167+
async fn instrumented_list_with_delimiter(
168+
&self,
169+
prefix: Option<&Path>,
170+
) -> Result<ListResult> {
171+
let timestamp = Utc::now();
172+
let start = Instant::now();
173+
let ret = self.inner.list_with_delimiter(prefix).await?;
174+
let elapsed = start.elapsed();
175+
176+
self.requests.lock().push(RequestDetails {
177+
op: Operation::List,
178+
path: prefix.cloned().unwrap_or_else(|| Path::from("")),
179+
timestamp,
180+
duration: Some(elapsed),
181+
size: None,
182+
range: None,
183+
extra_display: None,
184+
});
185+
186+
Ok(ret)
187+
}
166188
}
167189

168190
impl fmt::Display for InstrumentedObjectStore {
@@ -217,6 +239,10 @@ impl ObjectStore for InstrumentedObjectStore {
217239
}
218240

219241
async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result<ListResult> {
242+
if self.enabled() {
243+
return self.instrumented_list_with_delimiter(prefix).await;
244+
}
245+
220246
self.inner.list_with_delimiter(prefix).await
221247
}
222248

@@ -569,6 +595,29 @@ mod tests {
569595
assert!(request.extra_display.is_none());
570596
}
571597

598+
#[tokio::test]
599+
async fn instrumented_store_list_with_delimiter() {
600+
let (instrumented, path) = setup_test_store().await;
601+
602+
// By default no requests should be instrumented/stored
603+
assert!(instrumented.requests.lock().is_empty());
604+
let _ = instrumented.list_with_delimiter(Some(&path)).await.unwrap();
605+
assert!(instrumented.requests.lock().is_empty());
606+
607+
instrumented.set_instrument_mode(InstrumentedObjectStoreMode::Trace);
608+
assert!(instrumented.requests.lock().is_empty());
609+
let _ = instrumented.list_with_delimiter(Some(&path)).await.unwrap();
610+
assert_eq!(instrumented.requests.lock().len(), 1);
611+
612+
let request = instrumented.take_requests().pop().unwrap();
613+
assert_eq!(request.op, Operation::List);
614+
assert_eq!(request.path, path);
615+
assert!(request.duration.is_some());
616+
assert!(request.size.is_none());
617+
assert!(request.range.is_none());
618+
assert!(request.extra_display.is_none());
619+
}
620+
572621
#[test]
573622
fn request_details() {
574623
let rd = RequestDetails {

0 commit comments

Comments
 (0)