diff --git a/datafusion-cli/src/command.rs b/datafusion-cli/src/command.rs index 48fb37e8a888..3fbfe5680cfc 100644 --- a/datafusion-cli/src/command.rs +++ b/datafusion-cli/src/command.rs @@ -128,7 +128,7 @@ impl Command { let profile_mode = mode .parse() .map_err(|_| - exec_datafusion_err!("Failed to parse input: {mode}. Valid options are disabled, enabled") + exec_datafusion_err!("Failed to parse input: {mode}. Valid options are disabled, summary, trace") )?; print_options .instrumented_registry @@ -165,7 +165,7 @@ impl Command { ("\\pset [NAME [VALUE]]", "set table output option\n(format)") } Self::ObjectStoreProfileMode(_) => ( - "\\object_store_profiling (disabled|enabled)", + "\\object_store_profiling (disabled|summary|trace)", "print or set object store profile mode", ), } @@ -312,13 +312,22 @@ mod tests { InstrumentedObjectStoreMode::default() ); - cmd = "object_store_profiling enabled" + cmd = "object_store_profiling summary" .parse() .expect("expected parse to succeed"); assert!(cmd.execute(&ctx, &mut print_options).await.is_ok()); assert_eq!( print_options.instrumented_registry.instrument_mode(), - InstrumentedObjectStoreMode::Enabled + InstrumentedObjectStoreMode::Summary + ); + + cmd = "object_store_profiling trace" + .parse() + .expect("expected parse to succeed"); + assert!(cmd.execute(&ctx, &mut print_options).await.is_ok()); + assert_eq!( + print_options.instrumented_registry.instrument_mode(), + InstrumentedObjectStoreMode::Trace ); cmd = "object_store_profiling does_not_exist" diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs index 3dbe839d3c9b..bdb2fdf5198e 100644 --- a/datafusion-cli/src/main.rs +++ b/datafusion-cli/src/main.rs @@ -151,7 +151,7 @@ struct Args { #[clap( long, - help = "Specify the default object_store_profiling mode, defaults to 'disabled'.\n[possible values: disabled, enabled]", + help = "Specify the default object_store_profiling mode, defaults to 'disabled'.\n[possible values: disabled, summary, trace]", default_value_t = InstrumentedObjectStoreMode::Disabled )] object_store_profiling: InstrumentedObjectStoreMode, diff --git a/datafusion-cli/src/object_storage/instrumented.rs b/datafusion-cli/src/object_storage/instrumented.rs index 9252e0688c35..cb96734f2464 100644 --- a/datafusion-cli/src/object_storage/instrumented.rs +++ b/datafusion-cli/src/object_storage/instrumented.rs @@ -48,8 +48,10 @@ pub enum InstrumentedObjectStoreMode { /// Disable collection of profiling data #[default] Disabled, - /// Enable collection of profiling data - Enabled, + /// Enable collection of profiling data and output a summary + Summary, + /// Enable collection of profiling data and output a summary and all details + Trace, } impl fmt::Display for InstrumentedObjectStoreMode { @@ -64,7 +66,8 @@ impl FromStr for InstrumentedObjectStoreMode { fn from_str(s: &str) -> std::result::Result { match s.to_lowercase().as_str() { "disabled" => Ok(Self::Disabled), - "enabled" => Ok(Self::Enabled), + "summary" => Ok(Self::Summary), + "trace" => Ok(Self::Trace), _ => Err(DataFusionError::Execution(format!("Unrecognized mode {s}"))), } } @@ -73,7 +76,8 @@ impl FromStr for InstrumentedObjectStoreMode { impl From for InstrumentedObjectStoreMode { fn from(value: u8) -> Self { match value { - 1 => InstrumentedObjectStoreMode::Enabled, + 1 => InstrumentedObjectStoreMode::Summary, + 2 => InstrumentedObjectStoreMode::Trace, _ => InstrumentedObjectStoreMode::Disabled, } } @@ -434,16 +438,21 @@ mod tests { InstrumentedObjectStoreMode::Disabled )); assert!(matches!( - "EnABlEd".parse().unwrap(), - InstrumentedObjectStoreMode::Enabled + "SUmMaRy".parse().unwrap(), + InstrumentedObjectStoreMode::Summary + )); + assert!(matches!( + "TRaCe".parse().unwrap(), + InstrumentedObjectStoreMode::Trace )); assert!("does_not_exist" .parse::() .is_err()); assert!(matches!(0.into(), InstrumentedObjectStoreMode::Disabled)); - assert!(matches!(1.into(), InstrumentedObjectStoreMode::Enabled)); - assert!(matches!(2.into(), InstrumentedObjectStoreMode::Disabled)); + assert!(matches!(1.into(), InstrumentedObjectStoreMode::Summary)); + assert!(matches!(2.into(), InstrumentedObjectStoreMode::Trace)); + assert!(matches!(3.into(), InstrumentedObjectStoreMode::Disabled)); } #[test] @@ -455,8 +464,8 @@ mod tests { InstrumentedObjectStoreMode::default() ); - reg = reg.with_profile_mode(InstrumentedObjectStoreMode::Enabled); - assert_eq!(reg.instrument_mode(), InstrumentedObjectStoreMode::Enabled); + reg = reg.with_profile_mode(InstrumentedObjectStoreMode::Trace); + assert_eq!(reg.instrument_mode(), InstrumentedObjectStoreMode::Trace); let store = object_store::memory::InMemory::new(); let url = "mem://test".parse().unwrap(); @@ -484,7 +493,7 @@ mod tests { let _ = instrumented.get(&path).await.unwrap(); assert!(instrumented.requests.lock().is_empty()); - instrumented.set_instrument_mode(InstrumentedObjectStoreMode::Enabled); + instrumented.set_instrument_mode(InstrumentedObjectStoreMode::Trace); assert!(instrumented.requests.lock().is_empty()); let _ = instrumented.get(&path).await.unwrap(); assert_eq!(instrumented.requests.lock().len(), 1); diff --git a/datafusion-cli/src/print_options.rs b/datafusion-cli/src/print_options.rs index f54de189b4ef..01be736ca54d 100644 --- a/datafusion-cli/src/print_options.rs +++ b/datafusion-cli/src/print_options.rs @@ -188,20 +188,21 @@ impl PrintOptions { if !self.quiet { writeln!(writer, "{formatted_exec_details}")?; - if self.instrumented_registry.instrument_mode() - != InstrumentedObjectStoreMode::Disabled - { + let instrument_mode = self.instrumented_registry.instrument_mode(); + if instrument_mode != InstrumentedObjectStoreMode::Disabled { writeln!(writer, "{OBJECT_STORE_PROFILING_HEADER}")?; for store in self.instrumented_registry.stores() { let requests = store.take_requests(); if !requests.is_empty() { writeln!(writer, "{store}")?; - for req in requests.iter() { - writeln!(writer, "{req}")?; + if instrument_mode == InstrumentedObjectStoreMode::Trace { + for req in requests.iter() { + writeln!(writer, "{req}")?; + } + // Add an extra blank line to help visually organize the output + writeln!(writer)?; } - // Add an extra blank line to help visually organize the output - writeln!(writer)?; writeln!(writer, "Summaries:")?; let summaries = RequestSummary::summarize_by_operation(&requests); @@ -252,7 +253,7 @@ mod tests { print_output.clear(); print_options .instrumented_registry - .set_instrument_mode(InstrumentedObjectStoreMode::Enabled); + .set_instrument_mode(InstrumentedObjectStoreMode::Trace); print_options.write_output(&mut print_output, exec_out.clone())?; let out_str: String = print_output .clone() diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs index a67924fef253..56620346ed0f 100644 --- a/datafusion-cli/tests/cli_integration.rs +++ b/datafusion-cli/tests/cli_integration.rs @@ -434,8 +434,11 @@ LOCATION 's3://data/cars.csv'; -- Initial query should not show any profiling as the object store is not instrumented yet SELECT * from CARS LIMIT 1; -\object_store_profiling enabled --- Query again to see the profiling output +\object_store_profiling trace +-- Query again to see the full profiling output +SELECT * from CARS LIMIT 1; +\object_store_profiling summary +-- Query again to see the summarized profiling output SELECT * from CARS LIMIT 1; \object_store_profiling disabled -- Final query should not show any profiling as we disabled it again diff --git a/datafusion-cli/tests/snapshots/object_store_profiling@s3_url_fallback.snap b/datafusion-cli/tests/snapshots/object_store_profiling@s3_url_fallback.snap index 50c6cc8eab99..5c91800676a4 100644 --- a/datafusion-cli/tests/snapshots/object_store_profiling@s3_url_fallback.snap +++ b/datafusion-cli/tests/snapshots/object_store_profiling@s3_url_fallback.snap @@ -8,7 +8,7 @@ info: AWS_ALLOW_HTTP: "true" AWS_ENDPOINT: "http://localhost:55031" AWS_SECRET_ACCESS_KEY: TEST-DataFusionPassword - stdin: "\n CREATE EXTERNAL TABLE CARS\nSTORED AS CSV\nLOCATION 's3://data/cars.csv';\n\n-- Initial query should not show any profiling as the object store is not instrumented yet\nSELECT * from CARS LIMIT 1;\n\\object_store_profiling enabled\n-- Query again to see the profiling output\nSELECT * from CARS LIMIT 1;\n\\object_store_profiling disabled\n-- Final query should not show any profiling as we disabled it again\nSELECT * from CARS LIMIT 1;\n" + stdin: "\n CREATE EXTERNAL TABLE CARS\nSTORED AS CSV\nLOCATION 's3://data/cars.csv';\n\n-- Initial query should not show any profiling as the object store is not instrumented yet\nSELECT * from CARS LIMIT 1;\n\\object_store_profiling trace\n-- Query again to see the full profiling output\nSELECT * from CARS LIMIT 1;\n\\object_store_profiling summary\n-- Query again to see the summarized profiling output\nSELECT * from CARS LIMIT 1;\n\\object_store_profiling disabled\n-- Final query should not show any profiling as we disabled it again\nSELECT * from CARS LIMIT 1;\n" snapshot_kind: text --- success: true @@ -26,7 +26,7 @@ exit_code: 0 1 row(s) fetched. [ELAPSED] -ObjectStore Profile mode set to Enabled +ObjectStore Profile mode set to Trace +-----+-------+---------------------+ | car | speed | time | +-----+-------+---------------------+ @@ -36,7 +36,7 @@ ObjectStore Profile mode set to Enabled [ELAPSED] Object Store Profiling -Instrumented Object Store: instrument_mode: Enabled, inner: AmazonS3(data) +Instrumented Object Store: instrument_mode: Trace, inner: AmazonS3(data) operation=Get duration=[DURATION] size=1006 path=cars.csv Summaries: @@ -50,6 +50,28 @@ size max: 1006 B size avg: 1006 B size sum: 1006 B +ObjectStore Profile mode set to Summary ++-----+-------+---------------------+ +| car | speed | time | ++-----+-------+---------------------+ +| red | 20.0 | 1996-04-12T12:05:03 | ++-----+-------+---------------------+ +1 row(s) fetched. +[ELAPSED] + +Object Store Profiling +Instrumented Object Store: instrument_mode: Summary, inner: AmazonS3(data) +Summaries: +Get +count: 1 +[SUMMARY_DURATION] +[SUMMARY_DURATION] +[SUMMARY_DURATION] +size min: 1006 B +size max: 1006 B +size avg: 1006 B +size sum: 1006 B + ObjectStore Profile mode set to Disabled +-----+-------+---------------------+ | car | speed | time | diff --git a/docs/source/user-guide/cli/usage.md b/docs/source/user-guide/cli/usage.md index 57a96c5d7900..29ed6b8183c2 100644 --- a/docs/source/user-guide/cli/usage.md +++ b/docs/source/user-guide/cli/usage.md @@ -65,7 +65,7 @@ OPTIONS: --object-store-profiling Specify the default object_store_profiling mode, defaults to 'disabled'. - [possible values: disabled, enabled] [default: Disabled] + [possible values: disabled, summary, trace] [default: Disabled] -p, --data-path Path to your data, default to current directory @@ -129,7 +129,7 @@ Available commands inside DataFusion CLI are: - Object Store Profiling Mode ```bash -> \object_store_profiling [disabled|enabled] +> \object_store_profiling [disabled|summary|trace] ``` ## Supported SQL