-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(ingestion_stream): improved stream developer experience
- Loading branch information
Showing
10 changed files
with
118 additions
and
93 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,44 +1,67 @@ | ||
#![allow(clippy::from_over_into)] | ||
//! This module defines the `IngestionStream` type, which is used for handling asynchronous streams of `IngestionNode` items in the ingestion pipeline. | ||
//! | ||
//! The `IngestionStream` type is a pinned, boxed, dynamically-dispatched stream that yields `Result<IngestionNode>` items. This type is essential for managing | ||
//! and processing large volumes of data asynchronously, ensuring efficient and scalable ingestion workflows. | ||
use anyhow::Result; | ||
use futures_util::stream::Stream; | ||
use futures_util::stream::{self, Stream}; | ||
use pin_project_lite::pin_project; | ||
use std::pin::Pin; | ||
|
||
use super::IngestionNode; | ||
|
||
/// A type alias for a pinned, boxed, dynamically-dispatched stream of `IngestionNode` items. | ||
/// | ||
/// This type is used in the ingestion pipeline to handle asynchronous streams of data. Each item in the stream is a `Result<IngestionNode>`, | ||
/// allowing for error handling during the ingestion process. The `Send` trait is implemented to ensure that the stream can be safely sent | ||
/// across threads, enabling concurrent processing. | ||
/// | ||
/// # Type Definition | ||
/// - `Pin<Box<dyn Stream<Item = Result<IngestionNode>> + Send>>` | ||
/// | ||
/// # Components | ||
/// - `Pin`: Ensures that the memory location of the stream is fixed, which is necessary for certain asynchronous operations. | ||
/// - `Box<dyn Stream<Item = Result<IngestionNode>>>`: A heap-allocated, dynamically-dispatched stream that yields `Result<IngestionNode>` items. | ||
/// - `Send`: Ensures that the stream can be sent across thread boundaries, facilitating concurrent processing. | ||
/// | ||
/// # Usage | ||
/// The `IngestionStream` type is typically used in the ingestion pipeline to process data asynchronously. It allows for efficient handling | ||
/// of large volumes of data by leveraging Rust's asynchronous capabilities. | ||
/// | ||
/// # Error Handling | ||
/// Each item in the stream is a `Result<IngestionNode>`, which means that errors can be propagated and handled during the ingestion process. | ||
/// This design allows for robust error handling and ensures that the ingestion pipeline can gracefully handle failures. | ||
/// | ||
/// # Performance Considerations | ||
/// The use of `Pin` and `Box` ensures that the stream's memory location is fixed and heap-allocated, respectively. This design choice is | ||
/// crucial for asynchronous operations that require stable memory addresses. Additionally, the `Send` trait enables concurrent processing, | ||
/// which can significantly improve performance in multi-threaded environments. | ||
/// | ||
/// # Edge Cases | ||
/// - The stream may yield errors (`Err` variants) instead of valid `IngestionNode` items. These errors should be handled appropriately | ||
/// to ensure the robustness of the ingestion pipeline. | ||
/// - The stream must be pinned to ensure that its memory location remains fixed, which is necessary for certain asynchronous operations. | ||
pub type IngestionStream = Pin<Box<dyn Stream<Item = Result<IngestionNode>> + Send>>; | ||
pub use futures_util::{StreamExt, TryStreamExt}; | ||
|
||
// We need to inform the compiler that `inner` is pinned as well | ||
pin_project! { | ||
/// An asynchronous stream of `IngestionNode` items. | ||
/// | ||
/// Wraps an internal stream of `Result<IngestionNode>` items. | ||
/// | ||
/// Streams, iterators and vectors of `Result<IngestionNode>` can be converted into an `IngestionStream`. | ||
pub struct IngestionStream { | ||
#[pin] | ||
inner: Pin<Box<dyn Stream<Item = Result<IngestionNode>> + Send>>, | ||
} | ||
} | ||
|
||
impl Stream for IngestionStream { | ||
type Item = Result<IngestionNode>; | ||
|
||
fn poll_next( | ||
self: Pin<&mut Self>, | ||
cx: &mut std::task::Context<'_>, | ||
) -> std::task::Poll<Option<Self::Item>> { | ||
let this = self.project(); | ||
this.inner.poll_next(cx) | ||
} | ||
} | ||
|
||
impl Into<IngestionStream> for Vec<Result<IngestionNode>> { | ||
fn into(self) -> IngestionStream { | ||
IngestionStream::iter(self) | ||
} | ||
} | ||
|
||
impl Into<IngestionStream> for Pin<Box<dyn Stream<Item = Result<IngestionNode>> + Send>> { | ||
fn into(self) -> IngestionStream { | ||
IngestionStream { inner: self } | ||
} | ||
} | ||
|
||
impl IngestionStream { | ||
pub fn empty() -> Self { | ||
IngestionStream { | ||
inner: stream::empty().boxed(), | ||
} | ||
} | ||
|
||
// NOTE: Can we really guarantee that the iterator will outlive the stream? | ||
pub fn iter<I>(iter: I) -> Self | ||
where | ||
I: IntoIterator<Item = Result<IngestionNode>> + Send + 'static, | ||
<I as IntoIterator>::IntoIter: Send, | ||
{ | ||
IngestionStream { | ||
inner: stream::iter(iter).boxed(), | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.