Skip to content

chore: improve query performance #1367

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 38 additions & 9 deletions src/handlers/http/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,12 @@ use http::StatusCode;
use itertools::Itertools;
use serde::{Deserialize, Serialize};
use serde_json::{json, Value};
use std::collections::HashMap;
use std::collections::{HashMap, HashSet};
use std::pin::Pin;
use std::sync::Arc;
use std::time::Instant;
use tracing::error;
use tokio::task::JoinSet;
use tracing::{error, warn};

use crate::event::commit_schema;
use crate::metrics::QUERY_EXECUTE_TIME;
Expand Down Expand Up @@ -126,7 +127,7 @@ pub async fn query(req: HttpRequest, query_request: Query) -> Result<HttpRespons
{
Ok(raw_logical_plan) => raw_logical_plan,
Err(_) => {
create_streams_for_querier().await;
create_streams_for_querier().await?;
session_state
.create_logical_plan(&query_request.query)
.await?
Expand Down Expand Up @@ -433,17 +434,45 @@ pub async fn update_schema_when_distributed(tables: &Vec<String>) -> Result<(),
/// Create streams for querier if they do not exist
/// get list of streams from memory and storage
/// create streams for memory from storage if they do not exist
pub async fn create_streams_for_querier() {
let querier_streams = PARSEABLE.streams.list();
pub async fn create_streams_for_querier() -> Result<(), QueryError> {
let store = PARSEABLE.storage.get_object_store();
let storage_streams = store.list_streams().await.unwrap();
for stream_name in storage_streams {
if !querier_streams.contains(&stream_name) {
let _ = PARSEABLE
let querier_streams = PARSEABLE.streams.list();

let querier_streams_set: HashSet<_> = querier_streams.into_iter().collect();

let storage_streams = store.list_streams().await?;

let missing_streams: Vec<_> = storage_streams
.into_iter()
.filter(|stream_name| !querier_streams_set.contains(stream_name))
.collect();

if missing_streams.is_empty() {
return Ok(());
}

let mut join_set = JoinSet::new();
for stream_name in missing_streams {
join_set.spawn(async move {
let result = PARSEABLE
.create_stream_and_schema_from_storage(&stream_name)
.await;

if let Err(e) = &result {
warn!("Failed to create stream '{}': {}", stream_name, e);
}

(stream_name, result)
});
}

while let Some(result) = join_set.join_next().await {
if let Err(join_error) = result {
warn!("Task join error: {}", join_error);
}
}

Ok(())
}

impl FromRequest for Query {
Expand Down
26 changes: 16 additions & 10 deletions src/parseable/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ use once_cell::sync::Lazy;
pub use staging::StagingError;
use streams::StreamRef;
pub use streams::{Stream, StreamNotFound, Streams};
use tokio::try_join;
use tracing::error;

#[cfg(feature = "kafka")]
Expand Down Expand Up @@ -270,17 +271,22 @@ impl Parseable {
return Ok(false);
}

let mut stream_metadata = ObjectStoreFormat::default();
let stream_metadata_bytes = storage.create_stream_from_ingestor(stream_name).await?;
if !stream_metadata_bytes.is_empty() {
stream_metadata = serde_json::from_slice::<ObjectStoreFormat>(&stream_metadata_bytes)?;
}
let (stream_metadata_bytes, schema_bytes) = try_join!(
storage.create_stream_from_ingestor(stream_name),
storage.create_schema_from_ingestor(stream_name)
)?;

let mut schema = Arc::new(Schema::empty());
let schema_bytes = storage.create_schema_from_ingestor(stream_name).await?;
if !schema_bytes.is_empty() {
schema = serde_json::from_slice::<Arc<Schema>>(&schema_bytes)?;
}
let stream_metadata = if stream_metadata_bytes.is_empty() {
ObjectStoreFormat::default()
} else {
serde_json::from_slice::<ObjectStoreFormat>(&stream_metadata_bytes)?
};

let schema = if schema_bytes.is_empty() {
Arc::new(Schema::empty())
} else {
serde_json::from_slice::<Arc<Schema>>(&schema_bytes)?
};

let static_schema: HashMap<String, Arc<Field>> = schema
.fields
Expand Down
Loading