From df258d3449565b66b1b6c2247d3baae2a8432317 Mon Sep 17 00:00:00 2001 From: Carter Green Date: Tue, 11 Apr 2023 11:05:16 -0500 Subject: [PATCH 01/22] FIX: Fix DbnDecoder partial decoding --- Cargo.lock | 10 +++---- python/src/lib.rs | 67 +++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 64 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1a21655..33821eb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -188,7 +188,7 @@ dependencies = [ [[package]] name = "databento-dbn" -version = "0.4.2" +version = "0.4.3" dependencies = [ "dbn", "pyo3", @@ -197,7 +197,7 @@ dependencies = [ [[package]] name = "dbn" -version = "0.4.2" +version = "0.4.3" dependencies = [ "anyhow", "async-compression", @@ -215,7 +215,7 @@ dependencies = [ [[package]] name = "dbn-c" -version = "0.4.2" +version = "0.4.3" dependencies = [ "cbindgen", "dbn", @@ -224,7 +224,7 @@ dependencies = [ [[package]] name = "dbn-cli" -version = "0.4.2" +version = "0.4.3" dependencies = [ "anyhow", "assert_cmd", @@ -237,7 +237,7 @@ dependencies = [ [[package]] name = "dbn-macros" -version = "0.4.2" +version = "0.4.3" [[package]] name = "difflib" diff --git a/python/src/lib.rs b/python/src/lib.rs index 7578365..02afe81 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -70,7 +70,7 @@ impl DbnDecoder { fn decode(&mut self) -> PyResult> { let mut recs = Vec::new(); - let position = self.buffer.position(); + let orig_position = self.buffer.position(); self.buffer.set_position(0); if !self.has_decoded_metadata { match MetadataDecoder::new(&mut self.buffer).decode() { @@ -79,12 +79,13 @@ impl DbnDecoder { self.has_decoded_metadata = true; } Err(err) => { - self.buffer.set_position(position); + self.buffer.set_position(orig_position); // haven't read enough data for metadata return Err(to_val_err(err)); } } } + let mut read_position = self.buffer.position() as usize; let mut decoder = RecordDecoder::new(&mut self.buffer); Python::with_gil(|py| -> PyResult<()> { while let Some(rec) = decoder.decode_ref().map_err(to_val_err)? { @@ -120,25 +121,29 @@ impl DbnDecoder { return Err(to_val_err(format!("Invalid rtype {rtype} found in record"))) } }; + // keep track of position after last _successful_ decoding to ensure + // buffer is left in correct state in the case where one or more + // successful decodings is followed by a partial one, i.e. `decode_ref` + // returning `Ok(None)` + read_position = decoder.get_mut().position() as usize; } Ok(()) }) .map_err(|e| { - self.buffer.set_position(position); + self.buffer.set_position(orig_position); e })?; if recs.is_empty() { - self.buffer.set_position(position); + self.buffer.set_position(orig_position); } else { - self.shift_buffer(); + self.shift_buffer(read_position); } Ok(recs) } } impl DbnDecoder { - fn shift_buffer(&mut self) { - let read_position = self.buffer.position() as usize; + fn shift_buffer(&mut self, read_position: usize) { let inner_buf = self.buffer.get_mut(); let length = inner_buf.len(); let new_length = length - read_position; @@ -150,7 +155,7 @@ impl DbnDecoder { #[cfg(test)] mod tests { - use dbn::encode::EncodeDbn; + use dbn::{encode::EncodeDbn, enums::rtype::OHLCV_1S}; use pyo3::{py_run, types::PyString}; use ::dbn::{ @@ -209,6 +214,52 @@ mod tests { } } + #[test] + fn test_full_with_partial_record() { + setup(); + let mut decoder = DbnDecoder::new(); + let buffer = Vec::new(); + let mut encoder = Encoder::new( + buffer, + &MetadataBuilder::new() + .dataset("XNAS.ITCH".to_owned()) + .schema(Schema::Ohlcv1S) + .stype_in(SType::Native) + .stype_out(SType::ProductId) + .start(0) + .build(), + ) + .unwrap(); + decoder.write(encoder.get_ref().as_slice()).unwrap(); + let metadata_pos = encoder.get_ref().len() as usize; + assert!(matches!(decoder.decode(), Ok(recs) if recs.len() == 1)); + assert!(decoder.has_decoded_metadata); + let rec1 = ErrorMsg::new(1680708278000000000, "Python"); + let rec2 = OhlcvMsg { + hd: RecordHeader::new::(OHLCV_1S, 1, 1, 1681228173000000000), + open: 100, + high: 200, + low: 50, + close: 150, + volume: 1000, + }; + encoder.encode_record(&rec1).unwrap(); + let rec1_pos = encoder.get_ref().len() as usize; + encoder.encode_record(&rec2).unwrap(); + assert!(decoder.buffer.get_ref().is_empty()); + // Write first record and part of second + decoder + .write(&encoder.get_ref()[metadata_pos..rec1_pos + 4]) + .unwrap(); + // Read first record + let res1 = decoder.decode(); + assert!(matches!(res1, Ok(recs) if recs.len() == 1)); + // Write rest of second record + decoder.write(&encoder.get_ref()[rec1_pos + 4..]).unwrap(); + let res2 = decoder.decode(); + assert!(matches!(res2, Ok(recs) if recs.len() == 1)); + } + #[test] fn test_dbn_decoder() { setup(); From ca4a7735458fb2809c89c4216448be92cb1a4872 Mon Sep 17 00:00:00 2001 From: Carter Green Date: Thu, 6 Apr 2023 13:50:12 -0500 Subject: [PATCH 02/22] ADD: Add RType enum for exhaustive matching --- CHANGELOG.md | 4 ++ python/src/lib.rs | 57 ++++++++---------- rust/dbn/src/encode/mod.rs | 2 + rust/dbn/src/enums.rs | 46 +++++++++++++++ rust/dbn/src/lib.rs | 1 + rust/dbn/src/macros.rs | 115 +++++++++++++++++++++++++++++++++++++ rust/dbn/src/python.rs | 11 +++- rust/dbn/src/record.rs | 23 +++++++- rust/dbn/src/record_ref.rs | 30 +++++++--- 9 files changed, 243 insertions(+), 46 deletions(-) create mode 100644 rust/dbn/src/macros.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a99271..9000d4b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## 0.5.0 - TBD +- Added `RType` enum for exhaustive pattern matching +- Fixed issue with decoding partial records in Python `DbnDecoder` + ## 0.4.3 - 2023-04-07 - Fixed typo in Python type stubs diff --git a/python/src/lib.rs b/python/src/lib.rs index 02afe81..7344dda 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -5,12 +5,12 @@ use pyo3::{prelude::*, wrap_pyfunction, PyClass}; use dbn::{ decode::dbn::{MetadataDecoder, RecordDecoder}, - enums::rtype, python::to_val_err, record::{ - BidAskPair, ErrorMsg, ImbalanceMsg, InstrumentDefMsg, MboMsg, Mbp10Msg, Mbp1Msg, OhlcvMsg, - RecordHeader, StatusMsg, SymbolMappingMsg, SystemMsg, TradeMsg, + BidAskPair, ErrorMsg, HasRType, ImbalanceMsg, InstrumentDefMsg, MboMsg, Mbp10Msg, Mbp1Msg, + OhlcvMsg, RecordHeader, StatusMsg, SymbolMappingMsg, SystemMsg, TradeMsg, }, + rtype_ts_out_dispatch, }; /// A Python module wrapping dbn functions @@ -48,6 +48,7 @@ fn databento_dbn(_py: Python<'_>, m: &PyModule) -> PyResult<()> { struct DbnDecoder { buffer: io::Cursor>, has_decoded_metadata: bool, + ts_out: bool, } #[pymethods] @@ -57,6 +58,7 @@ impl DbnDecoder { Self { buffer: io::Cursor::default(), has_decoded_metadata: false, + ts_out: false, } } @@ -75,6 +77,7 @@ impl DbnDecoder { if !self.has_decoded_metadata { match MetadataDecoder::new(&mut self.buffer).decode() { Ok(metadata) => { + self.ts_out = metadata.ts_out; Python::with_gil(|py| recs.push(metadata.into_py(py))); self.has_decoded_metadata = true; } @@ -91,36 +94,24 @@ impl DbnDecoder { while let Some(rec) = decoder.decode_ref().map_err(to_val_err)? { // Bug in clippy generates an error here. trivial_copy feature isn't enabled, // but clippy thinks these records are `Copy` - #[allow(clippy::clone_on_copy)] - match rec.header().rtype { - rtype::MBP_0 => recs.push(rec.get::().unwrap().clone().into_py(py)), - rtype::MBP_1 => recs.push(rec.get::().unwrap().clone().into_py(py)), - rtype::MBP_10 => recs.push(rec.get::().unwrap().clone().into_py(py)), - #[allow(deprecated)] - rtype::OHLCV_DEPRECATED - | rtype::OHLCV_1S - | rtype::OHLCV_1M - | rtype::OHLCV_1H - | rtype::OHLCV_1D => { - recs.push(rec.get::().unwrap().clone().into_py(py)) - } - rtype::STATUS => recs.push(rec.get::().unwrap().clone().into_py(py)), - rtype::IMBALANCE => { - recs.push(rec.get::().unwrap().clone().into_py(py)) - } - rtype::INSTRUMENT_DEF => { - recs.push(rec.get::().unwrap().clone().into_py(py)) - } - rtype::ERROR => recs.push(rec.get::().unwrap().clone().into_py(py)), - rtype::SYMBOL_MAPPING => { - recs.push(rec.get::().unwrap().clone().into_py(py)) - } - rtype::SYSTEM => recs.push(rec.get::().unwrap().clone().into_py(py)), - rtype::MBO => recs.push(rec.get::().unwrap().clone().into_py(py)), - rtype => { - return Err(to_val_err(format!("Invalid rtype {rtype} found in record"))) - } - }; + fn push_rec>>( + rec: &R, + py: Python, + recs: &mut Vec>, + ) { + recs.push(rec.clone().into_py(py)) + } + + // Safety: It's safe to cast to `WithTsOut` because we're passing in the `ts_out` + // from the metadata header. + if unsafe { rtype_ts_out_dispatch!(rec, self.ts_out, push_rec, py, &mut recs) } + .is_err() + { + return Err(to_val_err(format!( + "Invalid rtype {} found in record", + rec.header().rtype, + ))); + } // keep track of position after last _successful_ decoding to ensure // buffer is left in correct state in the case where one or more // successful decodings is followed by a partial one, i.e. `decode_ref` diff --git a/rust/dbn/src/encode/mod.rs b/rust/dbn/src/encode/mod.rs index 0625984..a2b84bb 100644 --- a/rust/dbn/src/encode/mod.rs +++ b/rust/dbn/src/encode/mod.rs @@ -36,12 +36,14 @@ pub trait EncodeDbn { /// This function returns an error if it's unable to write to the underlying writer /// or there's a serialization error. fn encode_record(&mut self, record: &R) -> anyhow::Result; + /// Encode a slice of DBN records. /// /// # Errors /// This function returns an error if it's unable to write to the underlying writer /// or there's a serialization error. fn encode_records(&mut self, records: &[R]) -> anyhow::Result<()>; + /// Encode a stream of DBN records. /// /// # Errors diff --git a/rust/dbn/src/enums.rs b/rust/dbn/src/enums.rs index 44ac029..9dc7ff0 100644 --- a/rust/dbn/src/enums.rs +++ b/rust/dbn/src/enums.rs @@ -208,10 +208,56 @@ impl Display for SType { } } +pub use rtype::RType; + /// Record types, possible values for [`RecordHeader::rtype`][crate::record::RecordHeader::rtype] +#[allow(deprecated)] pub mod rtype { + use num_enum::TryFromPrimitive; + use serde::Serialize; + use super::Schema; + /// A type of record, i.e. a struct implementing [`HasRType`](crate::record::HasRType). + #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, TryFromPrimitive)] + #[repr(u8)] + pub enum RType { + /// Market by price with a book depth of 0 (used for trades). + Mbp0 = MBP_0, + /// Market by price with a book depth of 1 (also used for TBBO). + Mbp1 = MBP_1, + /// Market by price with a book depth of 10. + Mbp10 = MBP_10, + /// Open, high, low, close, and volume at an unspecified cadence. + #[deprecated( + since = "0.3.3", + note = "Separated into separate rtypes for each OHLCV schema." + )] + OhlcvDeprecated = OHLCV_DEPRECATED, + /// Open, high, low, close, and volume at a 1-second cadence. + Ohlcv1S = OHLCV_1S, + /// Open, high, low, close, and volume at a 1-minute cadence. + Ohlcv1M = OHLCV_1M, + /// Open, high, low, close, and volume at a daily cadence. + Ohlcv1H = OHLCV_1H, + /// Open, high, low, close, and volume at a daily cadence. + Ohlcv1D = OHLCV_1D, + /// Exchange status. + Status = STATUS, + /// Instrument definition. + InstrumentDef = INSTRUMENT_DEF, + /// Order imbalance. + Imbalance = IMBALANCE, + /// Error from gateway. + Error = ERROR, + /// Symbol mapping. + SymbolMapping = SYMBOL_MAPPING, + /// A non-error message. Also used for heartbeats. + System = SYSTEM, + /// Market by order. + Mbo = MBO, + } + /// Market by price with a book depth of 0 (used for trades). pub const MBP_0: u8 = 0x00; /// Market by price with a book depth of 1 (also used for TBBO). diff --git a/rust/dbn/src/lib.rs b/rust/dbn/src/lib.rs index 66e76e6..483a3ff 100644 --- a/rust/dbn/src/lib.rs +++ b/rust/dbn/src/lib.rs @@ -9,6 +9,7 @@ pub mod decode; pub mod encode; pub mod enums; pub mod error; +mod macros; pub mod metadata; pub mod record; pub mod record_ref; diff --git a/rust/dbn/src/macros.rs b/rust/dbn/src/macros.rs new file mode 100644 index 0000000..d1afdcc --- /dev/null +++ b/rust/dbn/src/macros.rs @@ -0,0 +1,115 @@ +/// Base macro for type dispatch based on rtype. +/// +/// # Errors +/// This macro returns an error if the rtype is not recognized. +#[doc(hidden)] +#[macro_export] +macro_rules! rtype_dispatch_base { + ($rec_ref:expr, $handler:ident) => {{ + // Introduced new scope so new `use`s are ok + use $crate::enums::RType; + use $crate::record::*; + match $rec_ref.rtype() { + Ok(rtype) => Ok(match rtype { + RType::Mbp0 => $handler!(TradeMsg), + RType::Mbp1 => $handler!(Mbp1Msg), + RType::Mbp10 => $handler!(Mbp10Msg), + #[allow(deprecated)] + RType::OhlcvDeprecated + | RType::Ohlcv1S + | RType::Ohlcv1M + | RType::Ohlcv1H + | RType::Ohlcv1D => $handler!(OhlcvMsg), + RType::Imbalance => $handler!(ImbalanceMsg), + RType::Status => $handler!(StatusMsg), + RType::InstrumentDef => $handler!(InstrumentDefMsg), + RType::Error => $handler!(ErrorMsg), + RType::SymbolMapping => $handler!(SymbolMappingMsg), + RType::System => $handler!(SystemMsg), + RType::Mbo => $handler!(MboMsg), + }), + Err(e) => Err(e), + } + }}; +} + +/// Specializes a generic function to all record types and dispatches based on the +/// `rtype` and `ts_out`. +/// +/// # Safety +/// Assumes `$rec_ref` contains a record with `ts_out` appended. If this is not the +/// case, the reading the record will read beyond the end of the record. +/// +/// # Errors +/// This macro returns an error if the rtype is not recognized. +#[macro_export] +macro_rules! rtype_ts_out_dispatch { + ($rec_ref:expr, $ts_out:expr, $generic_fn:expr $(,$arg:expr)*) => {{ + macro_rules! maybe_ts_out { + ($r:ty) => {{ + if $ts_out { + $generic_fn($rec_ref.get_unchecked::>() $(, $arg)*) + } else { + $generic_fn(unsafe { $rec_ref.get_unchecked::<$r>() } $(, $arg)*) + } + }}; + } + $crate::rtype_dispatch_base!($rec_ref, maybe_ts_out) + }}; +} + +/// Specializes a generic function to all record types and dispatches based `rtype`. +/// +/// # Errors +/// This macro returns an error if the rtype is not recognized. +#[macro_export] +macro_rules! rtype_dispatch { + ($rec_ref:expr, $generic_fn:expr $(,$arg:expr)*) => {{ + macro_rules! handler { + ($r:ty) => {{ + // Safety: checks rtype before converting. + $generic_fn( unsafe { $rec_ref.get_unchecked::<$r>() } $(, $arg)*) + }} + } + $crate::rtype_dispatch_base!($rec_ref, handler) + }}; +} + +/// Specializes a generic async function to all record types and dispatches based +/// `rtype`. +/// +/// # Errors +/// This macro returns an error if the rtype is not recognized. +#[macro_export] +macro_rules! rtype_async_dispatch { + ($rec_ref:expr, $generic_fn:expr $(,$arg:expr)*) => {{ + macro_rules! handler { + ($r:ty) => {{ + // Safety: checks rtype before converting. + $generic_fn( unsafe { $rec_ref.get_unchecked::<$r>() } $(, $arg)*).await + }} + } + $crate::rtype_dispatch_base!($rec_ref, handler) + }}; +} + +/// Specializes a generic function to all record types wrapped in +/// [`WithTsOut`](crate::record::WithTsOut) and dispatches based on the `rtype`. +/// +/// # Safety +/// Assumes `$rec_ref` contains a record with `ts_out` appended. If this is not the +/// case, the reading the record will read beyond the end of the record. +/// +/// # Errors +/// This macro returns an error if the rtype is not recognized. +#[macro_export] +macro_rules! rtype_dispatch_with_ts_out { + ($rec_ref:expr, $generic_fn:expr $(,$arg:expr)*) => {{ + macro_rules! handler { + ($r:ty) => {{ + $generic_fn( $rec_ref.get_unchecked::>() $(, $arg)*) + }} + } + $crate::rtype_dispatch_base!($rec_ref, handler) + }; +}} diff --git a/rust/dbn/src/python.rs b/rust/dbn/src/python.rs index 6a59759..1185767 100644 --- a/rust/dbn/src/python.rs +++ b/rust/dbn/src/python.rs @@ -7,7 +7,7 @@ use std::{collections::HashMap, ffi::c_char, fmt, io, io::SeekFrom, num::NonZero use pyo3::{ exceptions::{PyTypeError, PyValueError}, prelude::*, - types::{PyBytes, PyDate, PyDateAccess, PyDict}, + types::{PyBytes, PyDate, PyDateAccess, PyDict, PyTuple}, PyClass, }; use time::Date; @@ -23,7 +23,7 @@ use crate::{ record::{ str_to_c_chars, BidAskPair, ErrorMsg, HasRType, ImbalanceMsg, InstrumentDefMsg, MboMsg, Mbp10Msg, Mbp1Msg, OhlcvMsg, RecordHeader, StatusMsg, SymbolMappingMsg, SystemMsg, TbboMsg, - TradeMsg, + TradeMsg, WithTsOut, }, UNDEF_ORDER_SIZE, UNDEF_PRICE, }; @@ -243,6 +243,13 @@ impl ToPyObject for SymbolMapping { } } +// `WithTsOut` is converted to a 2-tuple in Python +impl>> IntoPy for WithTsOut { + fn into_py(self, py: Python<'_>) -> PyObject { + PyTuple::new(py, [self.rec.into_py(py), self.ts_out.into_py(py)]).into_py(py) + } +} + fn extract_date(any: &PyAny) -> PyResult { let py_date = any.downcast::().map_err(PyErr::from)?; let month = diff --git a/rust/dbn/src/record.rs b/rust/dbn/src/record.rs index 34a1301..33b8c4b 100644 --- a/rust/dbn/src/record.rs +++ b/rust/dbn/src/record.rs @@ -8,8 +8,8 @@ use serde::Serialize; use crate::{ enums::{ - rtype, Action, InstrumentClass, MatchAlgorithm, SecurityUpdateAction, Side, - UserDefinedInstrument, + rtype::{self, RType}, + Action, InstrumentClass, MatchAlgorithm, SecurityUpdateAction, Side, UserDefinedInstrument, }, error::ConversionError, }; @@ -622,6 +622,16 @@ pub trait HasRType { fn record_size(&self) -> usize { self.header().record_size() } + + /// Tries to convert the raw `rtype` into an enum which is useful for exhaustive + /// pattern matching. + /// + /// # Errors + /// This function returns an error if the `rtype` field does not + /// contain a valid, known [`RType`](crate::enums::RType). + fn rtype(&self) -> crate::error::Result { + self.header().rtype() + } } impl RecordHeader { @@ -649,6 +659,15 @@ impl RecordHeader { pub const fn record_size(&self) -> usize { self.length as usize * Self::LENGTH_MULTIPLIER } + + /// Tries to convert the raw `rtype` into an enum. + /// + /// # Errors + /// This function returns an error if the `rtype` field does not + /// contain a valid, known [`RType`](crate::enums::RType). + pub fn rtype(&self) -> crate::error::Result { + RType::try_from(self.rtype).map_err(|_| ConversionError::TypeConversion("Invalid rtype")) + } } impl MboMsg { diff --git a/rust/dbn/src/record_ref.rs b/rust/dbn/src/record_ref.rs index f292a45..78a20d8 100644 --- a/rust/dbn/src/record_ref.rs +++ b/rust/dbn/src/record_ref.rs @@ -2,7 +2,10 @@ use std::{marker::PhantomData, mem, ptr::NonNull}; -use crate::record::{HasRType, RecordHeader}; +use crate::{ + enums::RType, + record::{HasRType, RecordHeader}, +}; /// A wrapper around a non-owning immutable reference to a DBN record. This wrapper /// allows for mixing of record types and schemas, and runtime record polymorphism. @@ -50,6 +53,21 @@ impl<'a> RecordRef<'a> { T::has_rtype(self.header().rtype) } + /// Returns the size of the record in bytes. + pub fn record_size(&self) -> usize { + self.header().record_size() + } + + /// Tries to convert the raw `rtype` into an enum which is useful for exhaustive + /// pattern matching. + /// + /// # Errors + /// This function returns an error if the `rtype` field does not + /// contain a valid, known [`RType`](crate::enums::RType). + pub fn rtype(&self) -> crate::error::Result { + self.header().rtype() + } + /// Returns a reference to the underlying record of type `T` or `None` if it points /// to another record type. /// @@ -89,10 +107,6 @@ mod tests { use super::*; - unsafe fn as_mut_u8_slice(data: &mut T) -> &mut [u8] { - std::slice::from_raw_parts_mut(data as *mut T as *mut u8, mem::size_of::()) - } - const SOURCE_RECORD: MboMsg = MboMsg { hd: RecordHeader::new::(rtype::MBO, 1, 1, 0), order_id: 17, @@ -109,15 +123,13 @@ mod tests { #[test] fn test_header() { - let mut source = SOURCE_RECORD; - let target = unsafe { RecordRef::new(as_mut_u8_slice(&mut source)) }; + let target = unsafe { RecordRef::new(SOURCE_RECORD.as_ref()) }; assert_eq!(*target.header(), SOURCE_RECORD.hd); } #[test] fn test_has_and_get() { - let mut source = SOURCE_RECORD; - let target = unsafe { RecordRef::new(as_mut_u8_slice(&mut source)) }; + let target = unsafe { RecordRef::new(SOURCE_RECORD.as_ref()) }; assert!(!target.has::()); assert!(!target.has::()); assert!(!target.has::()); From 92868f03cebc309585298b99476fa19d9e04942a Mon Sep 17 00:00:00 2001 From: Carter Green Date: Tue, 11 Apr 2023 15:45:58 -0500 Subject: [PATCH 03/22] MOD: Always return tuples from `DbnDecoder.decode` --- CHANGELOG.md | 1 + python/src/lib.rs | 25 ++++++++++++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9000d4b..8555a64 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## 0.5.0 - TBD - Added `RType` enum for exhaustive pattern matching +- Changed `DbnDecoder.decode` to always return a list of tuples - Fixed issue with decoding partial records in Python `DbnDecoder` ## 0.4.3 - 2023-04-07 diff --git a/python/src/lib.rs b/python/src/lib.rs index 7344dda..b6a3095 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -1,7 +1,7 @@ //! Python bindings for the [`dbn`] crate using [`pyo3`]. use std::io::{self, Write}; -use pyo3::{prelude::*, wrap_pyfunction, PyClass}; +use pyo3::{prelude::*, types::PyTuple, wrap_pyfunction, PyClass}; use dbn::{ decode::dbn::{MetadataDecoder, RecordDecoder}, @@ -99,7 +99,20 @@ impl DbnDecoder { py: Python, recs: &mut Vec>, ) { - recs.push(rec.clone().into_py(py)) + let pyrec = rec.clone().into_py(py); + recs.push( + // Convert non `WithTsOut` records to a (rec, None) + // for consistent typing + if pyrec + .as_ref(py) + .is_instance_of::() + .unwrap_or_default() + { + pyrec + } else { + (pyrec, py.None()).into_py(py) + }, + ) } // Safety: It's safe to cast to `WithTsOut` because we're passing in the `ts_out` @@ -271,7 +284,13 @@ decoder = DbnDecoder() with open(path, 'rb') as fin: decoder.write(fin.read()) records = decoder.decode() -assert len(records) == 3"# +assert len(records) == 3 +metadata = records[0] +for _, ts_out in records[1:]: + if metadata.ts_out: + assert ts_out is not None + else: + assert ts_out is None"# ) }); } From 9651eeefe3b000691d040e5ed0f1887a0275c1ee Mon Sep 17 00:00:00 2001 From: Nick Macholl Date: Tue, 11 Apr 2023 17:09:37 -0700 Subject: [PATCH 04/22] FIX: Fix DbnDecoder yielding bare Metadata record --- python/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/src/lib.rs b/python/src/lib.rs index b6a3095..e40b29e 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -78,7 +78,7 @@ impl DbnDecoder { match MetadataDecoder::new(&mut self.buffer).decode() { Ok(metadata) => { self.ts_out = metadata.ts_out; - Python::with_gil(|py| recs.push(metadata.into_py(py))); + Python::with_gil(|py| recs.push((metadata, py.None()).into_py(py))); self.has_decoded_metadata = true; } Err(err) => { @@ -285,7 +285,7 @@ with open(path, 'rb') as fin: decoder.write(fin.read()) records = decoder.decode() assert len(records) == 3 -metadata = records[0] +metadata, _ = records[0] for _, ts_out in records[1:]: if metadata.ts_out: assert ts_out is not None From 04db1095c36495356b6c3e2204935c91e93a95a8 Mon Sep 17 00:00:00 2001 From: Carter Green Date: Tue, 11 Apr 2023 17:31:48 -0500 Subject: [PATCH 05/22] ADD: Add string getters --- CHANGELOG.md | 1 + rust/dbn/src/metadata.rs | 8 +- rust/dbn/src/python.rs | 96 +++++++++++++++++ rust/dbn/src/record.rs | 223 +++++++++++++++++++++++++++++++++++---- 4 files changed, 302 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8555a64..11ab575 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## 0.5.0 - TBD - Added `RType` enum for exhaustive pattern matching +- Added `&str` getters for more `c_char` array record fields - Changed `DbnDecoder.decode` to always return a list of tuples - Fixed issue with decoding partial records in Python `DbnDecoder` diff --git a/rust/dbn/src/metadata.rs b/rust/dbn/src/metadata.rs index bad82bc..62e39fa 100644 --- a/rust/dbn/src/metadata.rs +++ b/rust/dbn/src/metadata.rs @@ -4,19 +4,19 @@ use std::num::NonZeroU64; use serde::Serialize; -use crate::enums::{SType, Schema}; -use crate::record::as_u8_slice; - // Dummy derive macro to get around `cfg_attr` incompatibility of several // of pyo3's attribute macros. See https://github.com/PyO3/pyo3/issues/780 #[cfg(not(feature = "python"))] pub use dbn_macros::MockPyo3; +use crate::enums::{SType, Schema}; +use crate::record::as_u8_slice; + /// Information about the data contained in a DBN file or stream. DBN requires the /// Metadata to be included at the start of the encoded data. #[derive(Debug, Clone, PartialEq, Eq, Serialize)] -#[cfg_attr(not(feature = "python"), derive(MockPyo3))] // bring `pyo3` attribute into scope #[cfg_attr(feature = "python", pyo3::pyclass(module = "databento_dbn"))] +#[cfg_attr(not(feature = "python"), derive(MockPyo3))] // bring `pyo3` attribute into scope pub struct Metadata { /// The DBN schema version number. Newly-encoded DBN files will use [`crate::DBN_VERSION`]. #[pyo3(get)] diff --git a/rust/dbn/src/python.rs b/rust/dbn/src/python.rs index 1185767..87df15b 100644 --- a/rust/dbn/src/python.rs +++ b/rust/dbn/src/python.rs @@ -733,6 +733,12 @@ impl StatusMsg { fn py_record_size(&self) -> usize { self.record_size() } + + #[getter] + #[pyo3(name = "group")] + fn py_group(&self) -> PyResult<&str> { + self.group().map_err(to_val_err) + } } #[pymethods] @@ -894,6 +900,72 @@ impl InstrumentDefMsg { fn py_record_size(&self) -> usize { self.record_size() } + + #[getter] + #[pyo3(name = "currency")] + fn py_currency(&self) -> PyResult<&str> { + self.currency().map_err(to_val_err) + } + + #[getter] + #[pyo3(name = "settl_currency")] + fn py_settl_currency(&self) -> PyResult<&str> { + self.settl_currency().map_err(to_val_err) + } + + #[getter] + #[pyo3(name = "secsubtype")] + fn py_secsubtype(&self) -> PyResult<&str> { + self.secsubtype().map_err(to_val_err) + } + + #[getter] + #[pyo3(name = "symbol")] + fn py_symbol(&self) -> PyResult<&str> { + self.symbol().map_err(to_val_err) + } + + #[getter] + #[pyo3(name = "exchange")] + fn py_exchange(&self) -> PyResult<&str> { + self.exchange().map_err(to_val_err) + } + + #[getter] + #[pyo3(name = "asset")] + fn py_asset(&self) -> PyResult<&str> { + self.asset().map_err(to_val_err) + } + + #[getter] + #[pyo3(name = "cfi")] + fn py_cfi(&self) -> PyResult<&str> { + self.cfi().map_err(to_val_err) + } + + #[getter] + #[pyo3(name = "security_type")] + fn py_security_type(&self) -> PyResult<&str> { + self.security_type().map_err(to_val_err) + } + + #[getter] + #[pyo3(name = "unit_of_measure")] + fn py_unit_of_measure(&self) -> PyResult<&str> { + self.unit_of_measure().map_err(to_val_err) + } + + #[getter] + #[pyo3(name = "underlying")] + fn py_underlying(&self) -> PyResult<&str> { + self.underlying().map_err(to_val_err) + } + + #[getter] + #[pyo3(name = "strike_price_currency")] + fn py_strike_price_currency(&self) -> PyResult<&str> { + self.strike_price_currency().map_err(to_val_err) + } } #[pymethods] @@ -972,6 +1044,12 @@ impl ErrorMsg { fn py_record_size(&self) -> usize { self.record_size() } + + #[getter] + #[pyo3(name = "err")] + fn py_err(&self) -> PyResult<&str> { + self.err().map_err(to_val_err) + } } #[pymethods] @@ -1013,6 +1091,18 @@ impl SymbolMappingMsg { fn py_record_size(&self) -> usize { self.record_size() } + + #[getter] + #[pyo3(name = "stype_in_symbol")] + fn py_stype_in_symbol(&self) -> PyResult<&str> { + self.stype_in_symbol().map_err(to_val_err) + } + + #[getter] + #[pyo3(name = "stype_out_symbol")] + fn py_stype_out_symbol(&self) -> PyResult<&str> { + self.stype_out_symbol().map_err(to_val_err) + } } #[pymethods] @@ -1034,6 +1124,12 @@ impl SystemMsg { fn py_record_size(&self) -> usize { self.record_size() } + + #[getter] + #[pyo3(name = "msg")] + fn py_msg(&self) -> PyResult<&str> { + self.msg().map_err(to_val_err) + } } #[cfg(test)] diff --git a/rust/dbn/src/record.rs b/rust/dbn/src/record.rs index 33b8c4b..fbd68bb 100644 --- a/rust/dbn/src/record.rs +++ b/rust/dbn/src/record.rs @@ -6,6 +6,11 @@ use std::{ffi::CStr, mem, os::raw::c_char, ptr::NonNull, slice, str::Utf8Error}; use anyhow::anyhow; use serde::Serialize; +// Dummy derive macro to get around `cfg_attr` incompatibility of several +// of pyo3's attribute macros. See https://github.com/PyO3/pyo3/issues/780 +#[cfg(not(feature = "python"))] +pub use dbn_macros::MockPyo3; + use crate::{ enums::{ rtype::{self, RType}, @@ -263,21 +268,24 @@ pub struct OhlcvMsg { #[repr(C)] #[derive(Clone, Debug, PartialEq, Eq, Serialize)] #[cfg_attr(feature = "trivial_copy", derive(Copy))] -#[cfg_attr( - feature = "python", - pyo3::pyclass(get_all, set_all, module = "databento_dbn") -)] +#[cfg_attr(feature = "python", pyo3::pyclass(module = "databento_dbn"))] +#[cfg_attr(not(feature = "python"), derive(MockPyo3))] // bring `pyo3` attribute into scope pub struct StatusMsg { /// The common header. + #[pyo3(get, set)] pub hd: RecordHeader, /// The capture-server-received timestamp expressed as number of nanoseconds since /// the UNIX epoch. #[serde(serialize_with = "serialize_large_u64")] + #[pyo3(get, set)] pub ts_recv: u64, #[serde(serialize_with = "serialize_c_char_arr")] pub group: [c_char; 21], + #[pyo3(get, set)] pub trading_status: u8, + #[pyo3(get, set)] pub halt_reason: u8, + #[pyo3(get, set)] pub trading_event: u8, } @@ -286,94 +294,125 @@ pub struct StatusMsg { #[repr(C)] #[derive(Clone, Debug, PartialEq, Eq, Serialize)] #[cfg_attr(feature = "trivial_copy", derive(Copy))] -#[cfg_attr( - feature = "python", - pyo3::pyclass(get_all, set_all, module = "databento_dbn") -)] +#[cfg_attr(feature = "python", pyo3::pyclass(module = "databento_dbn"))] +#[cfg_attr(not(feature = "python"), derive(MockPyo3))] // bring `pyo3` attribute into scope pub struct InstrumentDefMsg { /// The common header. + #[pyo3(get, set)] pub hd: RecordHeader, /// The capture-server-received timestamp expressed as number of nanoseconds since the /// UNIX epoch. #[serde(serialize_with = "serialize_large_u64")] + #[pyo3(get, set)] pub ts_recv: u64, /// The minimum constant tick for the instrument in units of 1e-9, i.e. /// 1/1,000,000,000 or 0.000000001. + #[pyo3(get, set)] pub min_price_increment: i64, /// The multiplier to convert the venue’s display price to the conventional price. + #[pyo3(get, set)] pub display_factor: i64, /// The last eligible trade time expressed as a number of nanoseconds since the /// UNIX epoch. #[serde(serialize_with = "serialize_large_u64")] + #[pyo3(get, set)] pub expiration: u64, /// The time of instrument activation expressed as a number of nanoseconds since the /// UNIX epoch. #[serde(serialize_with = "serialize_large_u64")] + #[pyo3(get, set)] pub activation: u64, /// The allowable high limit price for the trading day in units of 1e-9, i.e. /// 1/1,000,000,000 or 0.000000001. + #[pyo3(get, set)] pub high_limit_price: i64, /// The allowable low limit price for the trading day in units of 1e-9, i.e. /// 1/1,000,000,000 or 0.000000001. + #[pyo3(get, set)] pub low_limit_price: i64, /// The differential value for price banding in units of 1e-9, i.e. 1/1,000,000,000 /// or 0.000000001. + #[pyo3(get, set)] pub max_price_variation: i64, /// The trading session settlement price on `trading_reference_date`. + #[pyo3(get, set)] pub trading_reference_price: i64, /// The contract size for each instrument, in combination with `unit_of_measure`. + #[pyo3(get, set)] pub unit_of_measure_qty: i64, /// The value currently under development by the venue. Converted to units of 1e-9, i.e. /// 1/1,000,000,000 or 0.000000001. + #[pyo3(get, set)] pub min_price_increment_amount: i64, /// The value used for price calculation in spread and leg pricing in units of 1e-9, /// i.e. 1/1,000,000,000 or 0.000000001. + #[pyo3(get, set)] pub price_ratio: i64, /// A bitmap of instrument eligibility attributes. + #[pyo3(get, set)] pub inst_attrib_value: i32, /// The `product_id` of the first underlying instrument. + #[pyo3(get, set)] pub underlying_id: u32, /// The total cleared volume of the instrument traded during the prior trading session. + #[pyo3(get, set)] pub cleared_volume: i32, /// The implied book depth on the price level data feed. + #[pyo3(get, set)] pub market_depth_implied: i32, /// The (outright) book depth on the price level data feed. + #[pyo3(get, set)] pub market_depth: i32, /// The market segment of the instrument. + #[pyo3(get, set)] pub market_segment_id: u32, /// The maximum trading volume for the instrument. + #[pyo3(get, set)] pub max_trade_vol: u32, /// The minimum order entry quantity for the instrument. + #[pyo3(get, set)] pub min_lot_size: i32, /// The minimum quantity required for a block trade of the instrument. + #[pyo3(get, set)] pub min_lot_size_block: i32, /// The minimum quantity required for a round lot of the instrument. Multiples of /// this quantity are also round lots. + #[pyo3(get, set)] pub min_lot_size_round_lot: i32, /// The minimum trading volume for the instrument. + #[pyo3(get, set)] pub min_trade_vol: u32, /// The total open interest for the market at the close of the prior trading session. + #[pyo3(get, set)] pub open_interest_qty: i32, /// The number of deliverables per instrument, i.e. peak days. + #[pyo3(get, set)] pub contract_multiplier: i32, /// The quantity that a contract will decay daily, after `decay_start_date` has /// been reached. + #[pyo3(get, set)] pub decay_quantity: i32, /// The fixed contract value assigned to each instrument. + #[pyo3(get, set)] pub original_contract_size: i32, #[doc(hidden)] #[serde(skip)] pub reserved1: [c_char; 4], /// The trading session date corresponding to the settlement price in /// `trading_reference_price`, in number of days since the UNIX epoch. + #[pyo3(get, set)] pub trading_reference_date: u16, /// The channel ID assigned at the venue. + #[pyo3(get, set)] pub appl_id: i16, /// The calendar year reflected in the instrument symbol. + #[pyo3(get, set)] pub maturity_year: u16, /// The date at which a contract will begin to decay. + #[pyo3(get, set)] pub decay_start_date: u16, /// The channel ID assigned by Databento as an incrementing integer starting at zero. + #[pyo3(get, set)] pub channel_id: u16, /// The currency used for price fields. #[serde(serialize_with = "serialize_c_char_arr")] @@ -413,46 +452,63 @@ pub struct InstrumentDefMsg { pub strike_price_currency: [c_char; 4], /// The classification of the instrument. #[serde(serialize_with = "serialize_c_char")] + #[pyo3(get, set)] pub instrument_class: c_char, #[doc(hidden)] #[serde(skip)] pub reserved2: [c_char; 2], /// The strike price of the option. Converted to units of 1e-9, i.e. 1/1,000,000,000 /// or 0.000000001. + #[pyo3(get, set)] pub strike_price: i64, #[doc(hidden)] #[serde(skip)] pub reserved3: [c_char; 6], /// The matching algorithm used for the instrument, typically **F**IFO. #[serde(serialize_with = "serialize_c_char")] + #[pyo3(get, set)] pub match_algorithm: c_char, /// The current trading state of the instrument. + #[pyo3(get, set)] pub md_security_trading_status: u8, /// The price denominator of the main fraction. + #[pyo3(get, set)] pub main_fraction: u8, /// The number of digits to the right of the tick mark, to display fractional prices. + #[pyo3(get, set)] pub price_display_format: u8, /// The type indicators for the settlement price, as a bitmap. + #[pyo3(get, set)] pub settl_price_type: u8, /// The price denominator of the sub fraction. + #[pyo3(get, set)] pub sub_fraction: u8, /// The product complex of the instrument. + #[pyo3(get, set)] pub underlying_product: u8, /// Indicates if the instrument definition has been added, modified, or deleted. + #[pyo3(get, set)] pub security_update_action: SecurityUpdateAction, /// The calendar month reflected in the instrument symbol. + #[pyo3(get, set)] pub maturity_month: u8, /// The calendar day reflected in the instrument symbol, or 0. + #[pyo3(get, set)] pub maturity_day: u8, /// The calendar week reflected in the instrument symbol, or 0. + #[pyo3(get, set)] pub maturity_week: u8, /// Indicates if the instrument is user defined: **Y**es or **N**o. + #[pyo3(get, set)] pub user_defined_instrument: UserDefinedInstrument, /// The type of `contract_multiplier`. Either `1` for hours, or `2` for days. + #[pyo3(get, set)] pub contract_multiplier_unit: i8, /// The schedule for delivering electricity. + #[pyo3(get, set)] pub flow_schedule_type: i8, /// The tick rule of the spread. + #[pyo3(get, set)] pub tick_rule: u8, // Filler for alignment. #[serde(skip)] @@ -529,12 +585,11 @@ pub struct ImbalanceMsg { #[repr(C)] #[derive(Clone, Debug, PartialEq, Eq, Serialize)] #[cfg_attr(feature = "trivial_copy", derive(Copy))] -#[cfg_attr( - feature = "python", - pyo3::pyclass(get_all, set_all, module = "databento_dbn") -)] +#[cfg_attr(feature = "python", pyo3::pyclass(module = "databento_dbn"))] +#[cfg_attr(not(feature = "python"), derive(MockPyo3))] // bring `pyo3` attribute into scope pub struct ErrorMsg { /// The common header. + #[pyo3(get, set)] pub hd: RecordHeader, /// The error message. #[serde(serialize_with = "serialize_c_char_arr")] @@ -546,12 +601,11 @@ pub struct ErrorMsg { #[repr(C)] #[derive(Clone, Debug, PartialEq, Eq, Serialize)] #[cfg_attr(feature = "trivial_copy", derive(Copy))] -#[cfg_attr( - feature = "python", - pyo3::pyclass(get_all, set_all, module = "databento_dbn") -)] +#[cfg_attr(feature = "python", pyo3::pyclass(module = "databento_dbn"))] +#[cfg_attr(not(feature = "python"), derive(MockPyo3))] // bring `pyo3` attribute into scope pub struct SymbolMappingMsg { /// The common header. + #[pyo3(get, set)] pub hd: RecordHeader, /// The input symbol. #[serde(serialize_with = "serialize_c_char_arr")] @@ -565,9 +619,11 @@ pub struct SymbolMappingMsg { pub _dummy: [c_char; 4], /// The start of the mapping interval expressed as the number of nanoseconds since /// the UNIX epoch. + #[pyo3(get, set)] pub start_ts: u64, /// The end of the mapping interval expressed as the number of nanoseconds since /// the UNIX epoch. + #[pyo3(get, set)] pub end_ts: u64, } @@ -576,12 +632,11 @@ pub struct SymbolMappingMsg { #[repr(C)] #[derive(Clone, Debug, PartialEq, Eq, Serialize)] #[cfg_attr(feature = "trivial_copy", derive(Copy))] -#[cfg_attr( - feature = "python", - pyo3::pyclass(get_all, set_all, module = "databento_dbn") -)] +#[cfg_attr(feature = "python", pyo3::pyclass(module = "databento_dbn"))] +#[cfg_attr(not(feature = "python"), derive(MockPyo3))] // bring `pyo3` attribute into scope pub struct SystemMsg { /// The common header. + #[pyo3(get, set)] pub hd: RecordHeader, /// The message from the Databento Live Subscription Gateway (LSG). #[serde(serialize_with = "serialize_c_char_arr")] @@ -754,7 +809,113 @@ impl Mbp10Msg { } } +impl StatusMsg { + /// Returns `group` as a `&str`. + /// + /// # Errors + /// This function returns an error if `group` contains invalid UTF-8. + pub fn group(&self) -> Result<&str, Utf8Error> { + c_chars_to_str(&self.group) + } +} + impl InstrumentDefMsg { + /// Returns `currency` as a `&str`. + /// + /// # Errors + /// This function returns an error if `currency` contains invalid UTF-8. + pub fn currency(&self) -> Result<&str, Utf8Error> { + c_chars_to_str(&self.currency) + } + + /// Returns `settl_currency` as a `&str`. + /// + /// # Errors + /// This function returns an error if `settl_currency` contains invalid UTF-8. + pub fn settl_currency(&self) -> Result<&str, Utf8Error> { + c_chars_to_str(&self.settl_currency) + } + + /// Returns `secsubtype` as a `&str`. + /// + /// # Errors + /// This function returns an error if `secsubtype` contains invalid UTF-8. + pub fn secsubtype(&self) -> Result<&str, Utf8Error> { + c_chars_to_str(&self.secsubtype) + } + + /// Returns `symbol` as a `&str`. + /// + /// # Errors + /// This function returns an error if `symbol` contains invalid UTF-8. + pub fn symbol(&self) -> Result<&str, Utf8Error> { + c_chars_to_str(&self.symbol) + } + + /// Returns `exchange` as a `&str`. + /// + /// # Errors + /// This function returns an error if `exchange` contains invalid UTF-8. + pub fn exchange(&self) -> Result<&str, Utf8Error> { + c_chars_to_str(&self.exchange) + } + + /// Returns `asset` as a `&str`. + /// + /// # Errors + /// This function returns an error if `asset` contains invalid UTF-8. + pub fn asset(&self) -> Result<&str, Utf8Error> { + c_chars_to_str(&self.asset) + } + + /// Returns `cfi` as a `&str`. + /// + /// # Errors + /// This function returns an error if `cfi` contains invalid UTF-8. + pub fn cfi(&self) -> Result<&str, Utf8Error> { + c_chars_to_str(&self.cfi) + } + + /// Returns `security_type` as a `&str`. + /// + /// # Errors + /// This function returns an error if `security_type` contains invalid UTF-8. + pub fn security_type(&self) -> Result<&str, Utf8Error> { + c_chars_to_str(&self.security_type) + } + + /// Returns `unit_of_measure` as a `&str`. + /// + /// # Errors + /// This function returns an error if `unit_of_measure` contains invalid UTF-8. + pub fn unit_of_measure(&self) -> Result<&str, Utf8Error> { + c_chars_to_str(&self.unit_of_measure) + } + + /// Returns `underlying` as a `&str`. + /// + /// # Errors + /// This function returns an error if `underlying` contains invalid UTF-8. + pub fn underlying(&self) -> Result<&str, Utf8Error> { + c_chars_to_str(&self.underlying) + } + + /// Returns `strike_price_currency` as a `&str`. + /// + /// # Errors + /// This function returns an error if `strike_price_currency` contains invalid UTF-8. + pub fn strike_price_currency(&self) -> Result<&str, Utf8Error> { + c_chars_to_str(&self.strike_price_currency) + } + + /// Returns `group` as a `&str`. + /// + /// # Errors + /// This function returns an error if `group` contains invalid UTF-8. + pub fn group(&self) -> Result<&str, Utf8Error> { + c_chars_to_str(&self.group) + } + /// Tries to convert the raw `instrument_class` to an enum. /// /// # Errors @@ -793,7 +954,7 @@ impl ErrorMsg { error } - /// Returns `err` as a `str`. + /// Returns `err` as a `&str`. /// /// # Errors /// This function returns an error if `err` contains invalid UTF-8. @@ -803,6 +964,24 @@ impl ErrorMsg { } } +impl SymbolMappingMsg { + /// Returns `stype_in_symbol` as a `&str`. + /// + /// # Errors + /// This function returns an error if `stype_in_symbol` contains invalid UTF-8. + pub fn stype_in_symbol(&self) -> Result<&str, Utf8Error> { + c_chars_to_str(&self.stype_in_symbol) + } + + /// Returns `stype_out_symbol` as a `&str`. + /// + /// # Errors + /// This function returns an error if `stype_out_symbol` contains invalid UTF-8. + pub fn stype_out_symbol(&self) -> Result<&str, Utf8Error> { + c_chars_to_str(&self.stype_out_symbol) + } +} + impl SystemMsg { const HEARTBEAT: &str = "Heartbeat"; @@ -832,7 +1011,7 @@ impl SystemMsg { .unwrap_or_default() } - /// Returns `msg` as a `str`. + /// Returns `msg` as a `&str`. /// /// # Errors /// This function returns an error if `msg` contains invalid UTF-8. From 1dbf6310f31551bb09e58a7cca35fd11a1463665 Mon Sep 17 00:00:00 2001 From: Nick Macholl Date: Tue, 11 Apr 2023 14:53:22 -0700 Subject: [PATCH 06/22] MOD: Python type stub improvements --- CHANGELOG.md | 1 + python/databento_dbn.pyi | 278 ++++++++++++++++++++++----------------- 2 files changed, 155 insertions(+), 124 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 11ab575..1eba87a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ - Added `&str` getters for more `c_char` array record fields - Changed `DbnDecoder.decode` to always return a list of tuples - Fixed issue with decoding partial records in Python `DbnDecoder` +- Fixed missing type hint for Metadata bytes support ## 0.4.3 - 2023-04-07 - Fixed typo in Python type stubs diff --git a/python/databento_dbn.pyi b/python/databento_dbn.pyi index d28d1ad..0f85722 100644 --- a/python/databento_dbn.pyi +++ b/python/databento_dbn.pyi @@ -1,10 +1,34 @@ -"""Type stubs for databento_dbn""" -from typing import BinaryIO, Optional, Sequence, SupportsBytes - -class Metadata: +"""Type stubs for databento_dbn.""" +from typing import ( + Any, + BinaryIO, + Dict, + List, + Optional, + Sequence, + SupportsBytes, + Tuple, + Union, +) + +_DBNRecord = Union[ + Metadata, + MBOMsg, + MBP1Msg, + MBP10Msg, + OHLCVMsg, + TradeMsg, + InstrumentDefMsg, + ImbalanceMsg, + ErrorMsg, + SymbolMappingMsg, + SystemMsg, +] + +class Metadata(SupportsBytes): """ - Information about the data contained in a DBN file or stream. DBN requires the - Metadata to be included at the start of the encoded data. + Information about the data contained in a DBN file or stream. DBN requires + the Metadata to be included at the start of the encoded data. See Also --------- @@ -13,10 +37,11 @@ class Metadata: """ + def __bytes__(self) -> bytes: ... @property def version(self) -> int: """ - The DBN schema version number. Newly-encoded DBN files will use [`crate::DBN_VERSION`]. + The DBN schema version number. Returns ------- @@ -36,7 +61,8 @@ class Metadata: @property def schema(self) -> int: """ - The data record schema. Specifies which record type is stored in the Zstd-compressed DBN file. + The data record schema. Specifies which record type is stored in the + Zstd-compressed DBN file. Returns ------- @@ -46,7 +72,8 @@ class Metadata: @property def start(self) -> int: """ - The UNIX nanosecond timestamp of the query start, or the first record if the file was split. + The UNIX nanosecond timestamp of the query start, or the first record + if the file was split. Returns ------- @@ -56,7 +83,8 @@ class Metadata: @property def end(self) -> int: """ - The UNIX nanosecond timestamp of the query end, or the last record if the file was split. + The UNIX nanosecond timestamp of the query end, or the last record if + the file was split. Returns ------- @@ -96,8 +124,8 @@ class Metadata: @property def ts_out(self) -> bool: """ - `true` if this store contains live data with send timestamps appended to each - record. + `true` if this store contains live data with send timestamps appended + to each record. Returns ------- @@ -117,7 +145,8 @@ class Metadata: @property def partial(self) -> Sequence[str]: """ - Symbols that did not resolve for _at least one day_ in the query time range. + Symbols that did not resolve for at least one day in the query time + range. Returns ------- @@ -127,7 +156,7 @@ class Metadata: @property def not_found(self) -> Sequence[str]: """ - Symbols that did not resolve for _any_ day in the query time range. + Symbols that did not resolve for any day in the query time range. Returns ------- @@ -135,20 +164,18 @@ class Metadata: """ @property - def mappings(self) -> Sequence[SymbolMapping]: + def mappings(self) -> Dict[str, List[Dict[str, Any]]]: """ Symbol mappings containing a native symbol and its mapping intervals. Returns ------- - Sequence[SymbolMapping] + Dict[str, List[Dict[str, Any]]]: """ class RecordHeader: - """ - DBN Record Header - """ + """DBN Record Header.""" @property def length(self) -> int: @@ -193,7 +220,8 @@ class RecordHeader: @property def ts_event(self) -> int: """ - The matching-engine-received timestamp expressed as number of nanoseconds since the UNIX epoch. + The matching-engine-received timestamp expressed as number of + nanoseconds since the UNIX epoch. Returns ------- @@ -202,9 +230,7 @@ class RecordHeader: """ class Record(SupportsBytes): - """ - Base class for DBN records. - """ + """Base class for DBN records.""" def __bytes__(self) -> bytes: ... @property @@ -228,9 +254,7 @@ class Record(SupportsBytes): """ class _MBOBase: - """ - Base for market-by-order messages. - """ + """Base for market-by-order messages.""" @property def order_id(self) -> int: @@ -286,11 +310,13 @@ class _MBOBase: @property def action(self) -> str: """ - The event action. Can be `A`dd, `C`ancel, `M`odify, clea`R`, or `T`rade. + The event action. Can be `A`dd, `C`ancel, `M`odify, clea`R`, or + `T`rade. Returns ------- str + """ @property def side(self) -> str: @@ -305,12 +331,13 @@ class _MBOBase: @property def ts_recv(self) -> int: """ - The capture-server-received timestamp expressed as number of nanoseconds since - the UNIX epoch. + The capture-server-received timestamp expressed as number of + nanoseconds since the UNIX epoch. Returns ------- int + """ @property def ts_in_delta(self) -> int: @@ -334,14 +361,10 @@ class _MBOBase: """ class MBOMsg(Record, _MBOBase): - """ - A market-by-order (MBO) tick message. - """ + """A market-by-order (MBO) tick message.""" class BidAskPair: - """ - A book level. - """ + """A book level.""" @property def bid_px(self) -> int: @@ -405,9 +428,7 @@ class BidAskPair: """ class _MBPBase: - """ - Base for market-by-price messages. - """ + """Base for market-by-price messages.""" @property def price(self) -> int: @@ -433,7 +454,8 @@ class _MBPBase: @property def action(self) -> str: """ - The event action. Can be `A`dd, `C`ancel, `M`odify, clea`R`, or `T`rade. + The event action. Can be `A`dd, `C`ancel, `M`odify, clea`R`, or + `T`rade. Returns ------- @@ -473,7 +495,8 @@ class _MBPBase: @property def ts_recv(self) -> int: """ - The capture-server-received timestamp expressed as number of nanoseconds since the UNIX epoch. + The capture-server-received timestamp expressed as number of + nanoseconds since the UNIX epoch. Returns ------- @@ -503,14 +526,14 @@ class _MBPBase: class TradeMsg(Record, _MBPBase): """ - Market by price implementation with a book depth of 0. Equivalent to - MBP-0. The record of the `Trades` schema. + Market by price implementation with a book depth of 0. + + Equivalent to MBP-0. The record of the `Trades` schema. + """ class MBP1Msg(Record, _MBPBase): - """ - Market by price implementation with a known book depth of 1. - """ + """Market by price implementation with a known book depth of 1.""" @property def booklevel(self) -> Sequence[BidAskPair]: @@ -528,9 +551,7 @@ class MBP1Msg(Record, _MBPBase): """ class MBP10Msg(Record, _MBPBase): - """ - Market by price implementation with a known book depth of 10. - """ + """Market by price implementation with a known book depth of 10.""" @property def booklevel(self) -> Sequence[BidAskPair]: @@ -548,9 +569,7 @@ class MBP10Msg(Record, _MBPBase): """ class OHLCVMsg(Record): - """ - Open, high, low, close, and volume message. - """ + """Open, high, low, close, and volume message.""" @property def open(self) -> int: @@ -604,15 +623,13 @@ class OHLCVMsg(Record): """ class InstrumentDefMsg(Record): - """ - Definition of an instrument. - """ + """Definition of an instrument.""" @property def ts_recv(self) -> int: """ - The capture-server-received timestamp expressed as number of nanoseconds since the - UNIX epoch. + The capture-server-received timestamp expressed as number of + nanoseconds since the UNIX epoch. Returns ------- @@ -633,7 +650,8 @@ class InstrumentDefMsg(Record): @property def display_factor(self) -> int: """ - The multiplier to convert the venue’s display price to the conventional price. + The multiplier to convert the venue’s display price to the conventional + price. Returns ------- @@ -643,8 +661,8 @@ class InstrumentDefMsg(Record): @property def expiration(self) -> int: """ - The last eligible trade time expressed as a number of nanoseconds since the - UNIX epoch. + The last eligible trade time expressed as a number of nanoseconds since + the UNIX epoch. Returns ------- @@ -654,8 +672,8 @@ class InstrumentDefMsg(Record): @property def activation(self) -> int: """ - The time of instrument activation expressed as a number of nanoseconds since the - UNIX epoch. + The time of instrument activation expressed as a number of nanoseconds + since the UNIX epoch. Returns ------- @@ -665,8 +683,8 @@ class InstrumentDefMsg(Record): @property def high_limit_price(self) -> int: """ - The allowable high limit price for the trading day in units of 1e-9, i.e. - 1/1,000,000,000 or 0.000000001. + The allowable high limit price for the trading day in units of 1e-9, + i.e. 1/1,000,000,000 or 0.000000001. Returns ------- @@ -676,8 +694,8 @@ class InstrumentDefMsg(Record): @property def low_limit_price(self) -> int: """ - The allowable low limit price for the trading day in units of 1e-9, i.e. - 1/1,000,000,000 or 0.000000001. + The allowable low limit price for the trading day in units of 1e-9, + i.e. 1/1,000,000,000 or 0.000000001. Returns ------- @@ -687,8 +705,8 @@ class InstrumentDefMsg(Record): @property def max_price_variation(self) -> int: """ - The differential value for price banding in units of 1e-9, i.e. 1/1,000,000,000 - or 0.000000001. + The differential value for price banding in units of 1e-9, i.e. + 1/1,000,000,000 or 0.000000001. Returns ------- @@ -708,7 +726,8 @@ class InstrumentDefMsg(Record): @property def unit_of_measure_qty(self) -> int: """ - The contract size for each instrument, in combination with `unit_of_measure`. + The contract size for each instrument, in combination with + `unit_of_measure`. Returns ------- @@ -718,8 +737,8 @@ class InstrumentDefMsg(Record): @property def min_price_increment_amount(self) -> int: """ - The value currently under development by the venue. Converted to units of 1e-9, i.e. - 1/1,000,000,000 or 0.000000001. + The value currently under development by the venue. Converted to units + of 1e-9, i.e. 1/1,000,000,000 or 0.000000001. Returns ------- @@ -729,8 +748,8 @@ class InstrumentDefMsg(Record): @property def price_ratio(self) -> int: """ - The value used for price calculation in spread and leg pricing in units of 1e-9, - i.e. 1/1,000,000,000 or 0.000000001. + The value used for price calculation in spread and leg pricing in units + of 1e-9, i.e. 1/1,000,000,000 or 0.000000001. Returns ------- @@ -760,7 +779,8 @@ class InstrumentDefMsg(Record): @property def cleared_volume(self) -> int: """ - The total cleared volume of the instrument traded during the prior trading session. + The total cleared volume of the instrument traded during the prior + trading session. Returns ------- @@ -830,8 +850,8 @@ class InstrumentDefMsg(Record): @property def min_lot_size_round_lot(self) -> int: """ - The minimum quantity required for a round lot of the instrument. Multiples of - this quantity are also round lots. + The minimum quantity required for a round lot of the instrument. + Multiples of this quantity are also round lots. Returns ------- @@ -851,7 +871,8 @@ class InstrumentDefMsg(Record): @property def open_interest_qty(self) -> int: """ - The total open interest for the market at the close of the prior trading session. + The total open interest for the market at the close of the prior + trading session. Returns ------- @@ -871,8 +892,8 @@ class InstrumentDefMsg(Record): @property def decay_quantity(self) -> int: """ - The quantity that a contract will decay daily, after `decay_start_date` has - been reached. + The quantity that a contract will decay daily, after `decay_start_date` + has been reached. Retruns ------- @@ -933,7 +954,8 @@ class InstrumentDefMsg(Record): @property def channel_id(self) -> int: """ - The channel ID assigned by Databento as an incrementing integer starting at zero. + The channel ID assigned by Databento as an incrementing integer + starting at zero. Returns ------- @@ -1033,7 +1055,8 @@ class InstrumentDefMsg(Record): @property def unit_of_measure(self) -> str: """ - The unit of measure for the instrument’s original contract size, e.g. USD or LBS. + The unit of measure for the instrument’s original contract size, e.g. + USD or LBS. Returns ------- @@ -1073,8 +1096,8 @@ class InstrumentDefMsg(Record): @property def strike_price(self) -> int: """ - The strike price of the option. Converted to units of 1e-9, i.e. 1/1,000,000,000 - or 0.000000001. + The strike price of the option. Converted to units of 1e-9, i.e. + 1/1,000,000,000 or 0.000000001. Returns ------- @@ -1114,7 +1137,8 @@ class InstrumentDefMsg(Record): @property def price_display_format(self) -> int: """ - The number of digits to the right of the tick mark, to display fractional prices. + The number of digits to the right of the tick mark, to display + fractional prices. Returns ------- @@ -1154,7 +1178,8 @@ class InstrumentDefMsg(Record): @property def security_update_action(self) -> str: """ - Indicates if the instrument definition has been added, modified, or deleted. + Indicates if the instrument definition has been added, modified, or + deleted. Returns ------- @@ -1204,7 +1229,8 @@ class InstrumentDefMsg(Record): @property def contract_multiplier_unit(self) -> int: """ - The type of `contract_multiplier`. Either `1` for hours, or `2` for days. + The type of `contract_multiplier`. Either `1` for hours, or `2` for + days. Returns ------- @@ -1233,15 +1259,13 @@ class InstrumentDefMsg(Record): """ class ImbalanceMsg(Record): - """ - An auction imbalance message. - """ + """An auction imbalance message.""" @property def ts_recv(self) -> int: """ - The capture-server-received timestamp expressed as the number of nanoseconds - since the UNIX epoch. + The capture-server-received timestamp expressed as the number of + nanoseconds since the UNIX epoch. Returns ------- @@ -1251,8 +1275,8 @@ class ImbalanceMsg(Record): @property def ref_price(self) -> int: """ - The price at which the imbalance shares are calculated, where every 1 unit corresponds to - 1e-9, i.e. 1/1,000,000,000 or 0.000000001. + The price at which the imbalance shares are calculated, where every 1 + unit corresponds to 1e-9, i.e. 1/1,000,000,000 or 0.000000001. Returns ------- @@ -1272,7 +1296,8 @@ class ImbalanceMsg(Record): @property def cont_book_clr_price(self) -> int: """ - The hypothetical auction-clearing price for both cross and continuous orders. + The hypothetical auction-clearing price for both cross and continuous + orders. Returns ------- @@ -1382,7 +1407,8 @@ class ImbalanceMsg(Record): @property def side(self) -> str: """ - The market side of the `total_imbalance_qty`. Can be `A`sk, `B`id, or `N`one. + The market side of the `total_imbalance_qty`. Can be `A`sk, `B`id, or + `N`one. Returns ------- @@ -1432,7 +1458,8 @@ class ImbalanceMsg(Record): @property def significant_imbalance(self) -> str: """ - Venue-specific character code. For Nasdaq, contains the raw Price Variation Indicator. + Venue-specific character code. For Nasdaq, contains the raw Price + Variation Indicator. Returns ------- @@ -1441,9 +1468,7 @@ class ImbalanceMsg(Record): """ class ErrorMsg(Record): - """ - An error message from the Databento Live Subscription Gateway (LSG). - """ + """An error message from the Databento Live Subscription Gateway (LSG).""" @property def err(self) -> str: @@ -1457,9 +1482,8 @@ class ErrorMsg(Record): """ class SymbolMappingMsg(Record): - """ - A symbol mapping message which maps a symbol of one `SType` to another. - """ + """A symbol mapping message which maps a symbol of one `SType` to + another.""" @property def stype_in_symbol(self) -> str: @@ -1484,8 +1508,8 @@ class SymbolMappingMsg(Record): @property def start_ts(self) -> int: """ - The start of the mapping interval expressed as the number of nanoseconds since - the UNIX epoch. + The start of the mapping interval expressed as the number of + nanoseconds since the UNIX epoch. Returns ------- @@ -1495,8 +1519,8 @@ class SymbolMappingMsg(Record): @property def end_ts(self) -> int: """ - The end of the mapping interval expressed as the number of nanoseconds since - the UNIX epoch. + The end of the mapping interval expressed as the number of nanoseconds + since the UNIX epoch. Returns ------- @@ -1506,8 +1530,10 @@ class SymbolMappingMsg(Record): class SystemMsg(Record): """ - A non-error message from the Databento Live Subscription Gateway (LSG). Also used - for heartbeating. + A non-error message from the Databento Live Subscription Gateway (LSG). + + Also used for heartbeating. + """ @property @@ -1522,9 +1548,7 @@ class SystemMsg(Record): """ class DbnDecoder: - """ - A class for decoding DBN data to Python objects. - """ + """A class for decoding DBN data to Python objects.""" @property def buffer(self) -> bytes: @@ -1538,13 +1562,13 @@ class DbnDecoder: """ def decode( self, - ) -> Sequence[Record]: + ) -> List[Tuple[_DBNRecord, Optional[int]]]: """ Decode the buffered data into DBN records. Returns ------- - Sequence[Record] + List[Tuple[DBNRecord, Optional[int]]] Raises ------ @@ -1576,8 +1600,8 @@ class DbnDecoder: def decode_metadata(bytes: bytes) -> Metadata: """ - Decodes the given Python `bytes` to `Metadata`. Returns a `Metadata` object with - all the DBN metadata attributes. + Decodes the given Python `bytes` to `Metadata`. Returns a `Metadata` object + with all the DBN metadata attributes. Parameters ---------- @@ -1599,13 +1623,13 @@ def encode_metadata( symbols: Sequence[str], partial: Sequence[str], not_found: Sequence[str], - mappings: Sequence[SymbolMapping], + mappings: Sequence[object], end: Optional[int] = None, limit: Optional[int] = None, ) -> bytes: """ - Encodes the given metadata into the DBN metadata binary format. - Returns Python `bytes`. + Encodes the given metadata into the DBN metadata binary format. Returns + Python `bytes`. Parameters ---------- @@ -1614,7 +1638,8 @@ def encode_metadata( schema : str The data record schema. start : int - The UNIX nanosecond timestamp of the query start, or the first record if the file was split. + The UNIX nanosecond timestamp of the query start, or the first record + if the file was split. stype_in : str The input symbology type to map from. stype_out: str @@ -1625,10 +1650,11 @@ def encode_metadata( Symbols that did not resolve for _at least one day_ in the query time range. not_found : Sequence[str] Symbols that did not resolve for _any_ day in the query time range. - mappings : Sequence[SymbolMapping] + mappings : Sequence[Dict[str, Any]] Symbol mappings containing a native symbol and its mapping intervals. end : Optional[int] - The UNIX nanosecond timestamp of the query end, or the last record if the file was split. + The UNIX nanosecond timestamp of the query end, or the last record + if the file was split. limit : Optional[int] The optional maximum number of records for the query. @@ -1658,9 +1684,11 @@ def update_encoded_metadata( file : BinaryIO The file handle to update. start : int - The UNIX nanosecond timestamp of the query start, or the first record if the file was split. + The UNIX nanosecond timestamp of the query start, or the + first record if the file was split. end : Optional[int] - The UNIX nanosecond timestamp of the query end, or the last record if the file was split. + The UNIX nanosecond timestamp of the query end, or the + last record if the file was split. limit : Optional[int] The optional maximum number of records for the query. @@ -1696,7 +1724,8 @@ def write_dbn_file( schema : str The data record schema. start : int - The UNIX nanosecond timestamp of the query start, or the first record if the file was split. + The UNIX nanosecond timestamp of the query start, or the + first record if the file was split. stype_in : str The input symbology type to map from. stype_out : str @@ -1704,7 +1733,8 @@ def write_dbn_file( records : Sequence[object] A sequence of DBN record objects. end : Optional[int] - The UNIX nanosecond timestamp of the query end, or the last record if the file was split. + The UNIX nanosecond timestamp of the query end, or the + last record if the file was split. Raises ------ From 0e4135927ac83ca3ebfac398d1d214bf8c634620 Mon Sep 17 00:00:00 2001 From: Carter Green Date: Mon, 10 Apr 2023 17:34:00 -0500 Subject: [PATCH 07/22] MOD: Make `schema` and `stype_in` optional in DBN --- CHANGELOG.md | 1 + c/src/lib.rs | 4 +- python/src/lib.rs | 48 +++++-- rust/dbn/src/decode/dbn.rs | 27 +++- rust/dbn/src/decode/dbz.rs | 4 +- rust/dbn/src/encode/csv.rs | 121 +++++++++++++----- rust/dbn/src/encode/dbn.rs | 92 ++++++++------ rust/dbn/src/encode/json.rs | 8 +- rust/dbn/src/encode/mod.rs | 159 ++++++++++++++--------- rust/dbn/src/enums.rs | 31 ++--- rust/dbn/src/lib.rs | 2 + rust/dbn/src/macros.rs | 21 +++ rust/dbn/src/metadata.rs | 33 +++-- rust/dbn/src/python.rs | 176 +++++++++++--------------- tests/data/test_data.ohlcv-1h.dbn | Bin 318 -> 318 bytes tests/data/test_data.ohlcv-1h.dbn.zst | Bin 184 -> 184 bytes tests/data/test_data.ohlcv-1m.dbn | Bin 318 -> 318 bytes tests/data/test_data.ohlcv-1m.dbn.zst | Bin 162 -> 162 bytes tests/data/test_data.ohlcv-1s.dbn | Bin 318 -> 318 bytes tests/data/test_data.ohlcv-1s.dbn.zst | Bin 155 -> 155 bytes 20 files changed, 447 insertions(+), 280 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1eba87a..bbeb995 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Changelog ## 0.5.0 - TBD +- Changed `schema` and `stype_in` to optional in `Metadata` to support live data - Added `RType` enum for exhaustive pattern matching - Added `&str` getters for more `c_char` array record fields - Changed `DbnDecoder.decode` to always return a list of tuples diff --git a/c/src/lib.rs b/c/src/lib.rs index 7638e1a..5577a13 100644 --- a/c/src/lib.rs +++ b/c/src/lib.rs @@ -44,9 +44,9 @@ pub unsafe extern "C" fn encode_metadata( let metadata = MetadataBuilder::new() .dataset(dataset) .start(start) - .stype_in(SType::ProductId) + .stype_in(Some(SType::ProductId)) .stype_out(SType::ProductId) - .schema(schema) + .schema(Some(schema)) .build(); let buffer: &mut [u8] = slice::from_raw_parts_mut(buffer as *mut u8, length); let mut cursor = io::Cursor::new(buffer); diff --git a/python/src/lib.rs b/python/src/lib.rs index e40b29e..3376898 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -10,7 +10,7 @@ use dbn::{ BidAskPair, ErrorMsg, HasRType, ImbalanceMsg, InstrumentDefMsg, MboMsg, Mbp10Msg, Mbp1Msg, OhlcvMsg, RecordHeader, StatusMsg, SymbolMappingMsg, SystemMsg, TradeMsg, }, - rtype_ts_out_dispatch, + rtype_ts_out_dispatch, Metadata, }; /// A Python module wrapping dbn functions @@ -22,12 +22,10 @@ fn databento_dbn(_py: Python<'_>, m: &PyModule) -> PyResult<()> { m.add_class::() } // all functions exposed to Python need to be added here - m.add_wrapped(wrap_pyfunction!(dbn::python::decode_metadata))?; - m.add_wrapped(wrap_pyfunction!(dbn::python::encode_metadata))?; m.add_wrapped(wrap_pyfunction!(dbn::python::update_encoded_metadata))?; m.add_wrapped(wrap_pyfunction!(dbn::python::write_dbn_file))?; checked_add_class::(m)?; - checked_add_class::(m)?; + checked_add_class::(m)?; checked_add_class::(m)?; checked_add_class::(m)?; checked_add_class::(m)?; @@ -188,8 +186,8 @@ mod tests { buffer, &MetadataBuilder::new() .dataset("XNAS.ITCH".to_owned()) - .schema(Schema::Trades) - .stype_in(SType::Native) + .schema(Some(Schema::Trades)) + .stype_in(Some(SType::Native)) .stype_out(SType::ProductId) .start(0) .build(), @@ -227,8 +225,8 @@ mod tests { buffer, &MetadataBuilder::new() .dataset("XNAS.ITCH".to_owned()) - .schema(Schema::Ohlcv1S) - .stype_in(SType::Native) + .schema(Some(Schema::Ohlcv1S)) + .stype_in(Some(SType::Native)) .stype_out(SType::ProductId) .start(0) .build(), @@ -305,10 +303,22 @@ for _, ts_out in records[1:]: pyo3::py_run!( py, stype_in stype_out, - r#"from databento_dbn import decode_metadata, encode_metadata + r#"from databento_dbn import Metadata -metadata_bytes = encode_metadata("GLBX.MDP3", "mbo", 1, "native", "product_id", [], [], [], [], 2, None) -metadata = decode_metadata(metadata_bytes) +metadata = Metadata( + dataset="GLBX.MDP3", + schema="mbo", + start=1, + stype_in="native", + stype_out="product_id", + end=2, + symbols=[], + partial=[], + not_found=[], + mappings=[] +) +metadata_bytes = metadata.encode() +metadata = Metadata.decode(metadata_bytes) assert metadata.dataset == "GLBX.MDP3" assert metadata.schema == "mbo" assert metadata.start == 1 @@ -348,9 +358,21 @@ except Exception: setup(); Python::with_gil(|py| { py.run( - r#"from databento_dbn import DbnDecoder, encode_metadata + r#"from databento_dbn import DbnDecoder, Metadata -metadata_bytes = encode_metadata("GLBX.MDP3", "mbo", 1, "native", "product_id", [], [], [], [], 2, None) +metadata = Metadata( + dataset="GLBX.MDP3", + schema="mbo", + start=1, + stype_in="native", + stype_out="product_id", + end=2, + symbols=[], + partial=[], + not_found=[], + mappings=[] +) +metadata_bytes = bytes(metadata) decoder = DbnDecoder() decoder.write(metadata_bytes) decoder.write(bytes([0x04, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00])) diff --git a/rust/dbn/src/decode/dbn.rs b/rust/dbn/src/decode/dbn.rs index 9fdfe99..44c286d 100644 --- a/rust/dbn/src/decode/dbn.rs +++ b/rust/dbn/src/decode/dbn.rs @@ -18,6 +18,7 @@ use crate::{ record::{HasRType, RecordHeader}, record_ref::RecordRef, MappingInterval, Metadata, SymbolMapping, DBN_VERSION, METADATA_FIXED_LEN, NULL_END, + NULL_SCHEMA, NULL_STYPE, }; const DBN_PREFIX: &[u8] = b"DBN"; @@ -303,8 +304,16 @@ where .trim_end_matches('\0') .to_owned(); pos += crate::METADATA_DATASET_CSTR_LEN; - let schema = Schema::try_from(u16::from_le_slice(&buffer[pos..])) - .with_context(|| format!("Failed to read schema: '{}'", buffer[pos]))?; + + let raw_schema = u16::from_le_slice(&buffer[pos..]); + let schema = if raw_schema == NULL_SCHEMA { + None + } else { + Some( + Schema::try_from(raw_schema) + .with_context(|| format!("Failed to read schema: '{}'", buffer[pos]))?, + ) + }; pos += mem::size_of::(); let start = u64::from_le_slice(&buffer[pos..]); pos += U64_SIZE; @@ -314,8 +323,14 @@ where pos += U64_SIZE; // skip deprecated record_count pos += U64_SIZE; - let stype_in = SType::try_from(buffer[pos]) - .with_context(|| format!("Failed to read stype_in: '{}'", buffer[pos]))?; + let stype_in = if buffer[pos] == NULL_STYPE { + None + } else { + Some( + SType::try_from(buffer[pos]) + .with_context(|| format!("Failed to read stype_in: '{}'", buffer[pos]))?, + ) + }; pos += mem::size_of::(); let stype_out = SType::try_from(buffer[pos]) .with_context(|| format!("Failed to read stype_out: '{}'", buffer[pos]))?; @@ -623,9 +638,9 @@ mod tests { &mut buffer, &MetadataBuilder::new() .dataset("XNAS.ITCH".to_owned()) - .schema(Schema::Mbo) + .schema(Some(Schema::Mbo)) .start(0) - .stype_in(SType::ProductId) + .stype_in(Some(SType::ProductId)) .stype_out(SType::ProductId) .build(), ) diff --git a/rust/dbn/src/decode/dbz.rs b/rust/dbn/src/decode/dbz.rs index 0442b95..7702292 100644 --- a/rust/dbn/src/decode/dbz.rs +++ b/rust/dbn/src/decode/dbz.rs @@ -250,8 +250,8 @@ impl MetadataDecoder { Ok(Metadata { version: 0, dataset, - schema, - stype_in, + schema: Some(schema), + stype_in: Some(stype_in), stype_out, start, end: NonZeroU64::new(end), diff --git a/rust/dbn/src/encode/csv.rs b/rust/dbn/src/encode/csv.rs index 59490a2..e7dd435 100644 --- a/rust/dbn/src/encode/csv.rs +++ b/rust/dbn/src/encode/csv.rs @@ -1,9 +1,18 @@ //! Encoding of DBN records into comma-separated values (CSV). use std::io; +use anyhow::anyhow; use streaming_iterator::StreamingIterator; use super::EncodeDbn; +use crate::{ + decode::DecodeDbn, + enums::{RType, Schema}, + record::{ + ImbalanceMsg, InstrumentDefMsg, MboMsg, Mbp10Msg, Mbp1Msg, OhlcvMsg, StatusMsg, TbboMsg, + TradeMsg, + }, +}; /// Type for encoding files and streams of DBN records in CSV. /// @@ -26,6 +35,13 @@ where .from_writer(writer); Self { writer: csv_writer } } + + fn encode_header(&mut self) -> anyhow::Result<()> { + R::serialize_header(&mut self.writer)?; + // end of line + self.writer.write_record(None::<&[u8]>)?; + Ok(()) + } } impl EncodeDbn for Encoder @@ -48,33 +64,81 @@ where } fn encode_records(&mut self, records: &[R]) -> anyhow::Result<()> { - R::serialize_headers(&mut self.writer)?; - // end of line - self.writer.write_record(None::<&[u8]>)?; + self.encode_header::()?; for record in records { if self.encode_record(record)? { - return Ok(()); + break; } } - self.writer.flush()?; + self.flush()?; Ok(()) } + /// Encodes a stream of DBN records. + /// + /// # Errors + /// This function returns an error if it's unable to write to the underlying writer + /// or there's a serialization error. fn encode_stream( &mut self, mut stream: impl StreamingIterator, ) -> anyhow::Result<()> { - R::serialize_headers(&mut self.writer)?; - // end of line - self.writer.write_record(None::<&[u8]>)?; + self.encode_header::()?; while let Some(record) = stream.next() { if self.encode_record(record)? { - return Ok(()); + break; } } - self.writer.flush()?; + self.flush()?; Ok(()) } + + fn flush(&mut self) -> anyhow::Result<()> { + Ok(self.writer.flush()?) + } + + /// Encode DBN records directly from a DBN decoder. This implemented outside [`EncodeDbn`](super::EncodeDbn) + /// because the CSV encoder has the additional constraint of only being able to encode a single schema in + /// a stream. + /// + /// # Errors + /// This function returns an error if it's unable to write to the underlying writer + /// or there's a serialization error. + fn encode_decoded(&mut self, mut decoder: D) -> anyhow::Result<()> { + let ts_out = decoder.metadata().ts_out; + if let Some(schema) = decoder.metadata().schema { + match schema { + Schema::Mbo => self.encode_header::(), + Schema::Mbp1 => self.encode_header::(), + Schema::Mbp10 => self.encode_header::(), + Schema::Tbbo => self.encode_header::(), + Schema::Trades => self.encode_header::(), + Schema::Ohlcv1S => self.encode_header::(), + Schema::Ohlcv1M => self.encode_header::(), + Schema::Ohlcv1H => self.encode_header::(), + Schema::Ohlcv1D => self.encode_header::(), + Schema::Definition => self.encode_header::(), + Schema::Statistics => return Err(anyhow!("Unsupported schema: statistics")), + Schema::Status => self.encode_header::(), + Schema::Imbalance => self.encode_header::(), + }?; + let rtype = RType::from(schema); + while let Some(record) = decoder.decode_record_ref()? { + if record.rtype().map_or(true, |r| r != rtype) { + return Err(anyhow!("Schema indicated {rtype:?}, but found record with rtype {:?}. Mixed schemas cannot be encoded in CSV.", record.rtype())); + } + // Safety: It's safe to cast to `WithTsOut` because we're passing in the `ts_out` + // from the metadata header. + if unsafe { self.encode_record_ref(record, ts_out)? } { + break; + } + } + self.flush()?; + Ok(()) + } else { + Err(anyhow!("Can't encode a DBN with mixed schemas in CSV")) + } + } } pub(crate) mod serialize { @@ -91,8 +155,8 @@ pub(crate) mod serialize { /// Because of the flat nature of CSVs, there are several limitations in the /// Rust CSV serde serialization library. This trait helps work around them. pub trait CsvSerialize: Serialize + fmt::Debug { - /// Write the headers to `csv_writer`. - fn serialize_headers(csv_writer: &mut Writer) -> csv::Result<()>; + /// Encode the header to `csv_writer`. + fn serialize_header(csv_writer: &mut Writer) -> csv::Result<()>; /// Serialize the object to `csv_writer`. Allows custom behavior that would otherwise /// cause a runtime error, e.g. serializing a struct with array field. @@ -102,7 +166,7 @@ pub(crate) mod serialize { } impl CsvSerialize for MboMsg { - fn serialize_headers(csv_writer: &mut Writer) -> csv::Result<()> { + fn serialize_header(csv_writer: &mut Writer) -> csv::Result<()> { [ "rtype", "publisher_id", @@ -125,7 +189,7 @@ pub(crate) mod serialize { } impl CsvSerialize for Mbp1Msg { - fn serialize_headers(csv_writer: &mut Writer) -> csv::Result<()> { + fn serialize_header(csv_writer: &mut Writer) -> csv::Result<()> { [ "rtype", "publisher_id", @@ -153,7 +217,7 @@ pub(crate) mod serialize { } impl CsvSerialize for Mbp10Msg { - fn serialize_headers(csv_writer: &mut Writer) -> csv::Result<()> { + fn serialize_header(csv_writer: &mut Writer) -> csv::Result<()> { [ "rtype", "publisher_id", @@ -262,7 +326,7 @@ pub(crate) mod serialize { } impl CsvSerialize for TradeMsg { - fn serialize_headers(csv_writer: &mut Writer) -> csv::Result<()> { + fn serialize_header(csv_writer: &mut Writer) -> csv::Result<()> { [ "rtype", "publisher_id", @@ -284,7 +348,7 @@ pub(crate) mod serialize { } impl CsvSerialize for OhlcvMsg { - fn serialize_headers(csv_writer: &mut Writer) -> csv::Result<()> { + fn serialize_header(csv_writer: &mut Writer) -> csv::Result<()> { [ "rtype", "publisher_id", @@ -302,7 +366,7 @@ pub(crate) mod serialize { } impl CsvSerialize for StatusMsg { - fn serialize_headers(csv_writer: &mut Writer) -> csv::Result<()> { + fn serialize_header(csv_writer: &mut Writer) -> csv::Result<()> { [ "rtype", "publisher_id", @@ -320,7 +384,7 @@ pub(crate) mod serialize { } impl CsvSerialize for InstrumentDefMsg { - fn serialize_headers(csv_writer: &mut Writer) -> csv::Result<()> { + fn serialize_header(csv_writer: &mut Writer) -> csv::Result<()> { [ "rtype", "publisher_id", @@ -394,7 +458,7 @@ pub(crate) mod serialize { } impl CsvSerialize for ImbalanceMsg { - fn serialize_headers(csv_writer: &mut Writer) -> csv::Result<()> { + fn serialize_header(csv_writer: &mut Writer) -> csv::Result<()> { [ "rtype", "publisher_id", @@ -427,7 +491,7 @@ pub(crate) mod serialize { } impl CsvSerialize for ErrorMsg { - fn serialize_headers(csv_writer: &mut Writer) -> csv::Result<()> { + fn serialize_header(csv_writer: &mut Writer) -> csv::Result<()> { ["rtype", "publisher_id", "product_id", "ts_event", "err"] .iter() .try_for_each(|header| csv_writer.write_field(header)) @@ -435,7 +499,7 @@ pub(crate) mod serialize { } impl CsvSerialize for SystemMsg { - fn serialize_headers(csv_writer: &mut Writer) -> csv::Result<()> { + fn serialize_header(csv_writer: &mut Writer) -> csv::Result<()> { ["rtype", "publisher_id", "product_id", "ts_event", "msg"] .iter() .try_for_each(|header| csv_writer.write_field(header)) @@ -443,7 +507,7 @@ pub(crate) mod serialize { } impl CsvSerialize for SymbolMappingMsg { - fn serialize_headers(csv_writer: &mut Writer) -> csv::Result<()> { + fn serialize_header(csv_writer: &mut Writer) -> csv::Result<()> { [ "rtype", "publisher_id", @@ -460,9 +524,9 @@ pub(crate) mod serialize { } impl CsvSerialize for WithTsOut { - fn serialize_headers(csv_writer: &mut Writer) -> csv::Result<()> { + fn serialize_header(csv_writer: &mut Writer) -> csv::Result<()> { csv_writer.write_field("ts_out")?; - T::serialize_headers(csv_writer) + T::serialize_header(csv_writer) } fn serialize_to(&self, csv_writer: &mut Writer) -> csv::Result<()> { @@ -492,10 +556,9 @@ mod tests { fn extract_2nd_line(buffer: Vec) -> String { let output = String::from_utf8(buffer).expect("valid UTF-8"); - output - .split_once('\n') - .expect("two lines") - .1 + let (first, second) = output.split_once('\n').expect("two lines"); + assert!(!first.trim().is_empty()); + second .trim_end() // remove newline .to_owned() } diff --git a/rust/dbn/src/encode/dbn.rs b/rust/dbn/src/encode/dbn.rs index 303f81e..8e47d53 100644 --- a/rust/dbn/src/encode/dbn.rs +++ b/rust/dbn/src/encode/dbn.rs @@ -6,11 +6,11 @@ use std::{ }; use anyhow::{anyhow, Context}; -use streaming_iterator::StreamingIterator; use super::{zstd_encoder, DbnEncodable, EncodeDbn}; use crate::{ enums::Schema, Metadata, SymbolMapping, DBN_VERSION, NULL_END, NULL_LIMIT, NULL_RECORD_COUNT, + NULL_SCHEMA, NULL_STYPE, }; /// Type for encoding files and streams in Databento Binary Encoding (DBN). @@ -75,27 +75,8 @@ where } } - fn encode_records(&mut self, records: &[R]) -> anyhow::Result<()> { - for record in records { - if self.encode_record(record)? { - break; - } - } - self.writer.flush()?; - Ok(()) - } - - fn encode_stream( - &mut self, - mut stream: impl StreamingIterator, - ) -> anyhow::Result<()> { - while let Some(record) = stream.next() { - if self.encode_record(record)? { - break; - } - } - self.writer.flush()?; - Ok(()) + fn flush(&mut self) -> anyhow::Result<()> { + Ok(self.writer.flush()?) } } @@ -133,11 +114,14 @@ where let length = Self::calc_length(metadata); self.writer.write_all(length.to_le_bytes().as_slice())?; self.encode_fixed_len_cstr::<{ crate::METADATA_DATASET_CSTR_LEN }>(&metadata.dataset)?; - self.writer - .write_all((metadata.schema as u16).to_le_bytes().as_slice())?; + self.writer.write_all( + (metadata.schema.map(|s| s as u16).unwrap_or(NULL_SCHEMA)) + .to_le_bytes() + .as_slice(), + )?; self.encode_range_and_counts(metadata.start, metadata.end, metadata.limit)?; self.writer.write_all(&[ - metadata.stype_in as u8, + metadata.stype_in.map(|s| s as u8).unwrap_or(NULL_STYPE), metadata.stype_out as u8, metadata.ts_out as u8, ])?; @@ -319,8 +303,8 @@ mod tests { let metadata = Metadata { version: crate::DBN_VERSION, dataset: "GLBX.MDP3".to_owned(), - schema: Schema::Mbp10, - stype_in: SType::Native, + schema: Some(Schema::Mbp10), + stype_in: Some(SType::Native), stype_out: SType::ProductId, start: 1657230820000000000, end: NonZeroU64::new(1658960170000000000), @@ -436,8 +420,8 @@ mod tests { let orig_metadata = Metadata { version: crate::DBN_VERSION, dataset: "GLBX.MDP3".to_owned(), - schema: Schema::Mbo, - stype_in: SType::Smart, + schema: Some(Schema::Mbo), + stype_in: Some(SType::Smart), stype_out: SType::Native, start: 1657230820000000000, end: NonZeroU64::new(1658960170000000000), @@ -480,9 +464,9 @@ mod tests { fn test_encode_decode_nulls() { let metadata = MetadataBuilder::new() .dataset("XNAS.ITCH".to_owned()) - .schema(Schema::Mbo) + .schema(Some(Schema::Mbo)) .start(1697240529000000000) - .stype_in(SType::Native) + .stype_in(Some(SType::Native)) .stype_out(SType::ProductId) .build(); assert!(metadata.end.is_none()); @@ -498,9 +482,9 @@ mod tests { fn test_metadata_min_encoded_size() { let metadata = MetadataBuilder::new() .dataset("XNAS.ITCH".to_owned()) - .schema(Schema::Mbo) + .schema(Some(Schema::Mbo)) .start(1697240529000000000) - .stype_in(SType::Native) + .stype_in(Some(SType::Native)) .stype_out(SType::ProductId) .build(); let calc_length = MetadataEncoder::>::calc_length(&metadata); @@ -525,8 +509,9 @@ mod r#async { use tokio::io; use crate::{ - record::HasRType, Metadata, SymbolMapping, DBN_VERSION, NULL_END, NULL_LIMIT, - NULL_RECORD_COUNT, + record::HasRType, record_ref::RecordRef, rtype_ts_out_async_dispatch, Metadata, + SymbolMapping, DBN_VERSION, NULL_END, NULL_LIMIT, NULL_RECORD_COUNT, NULL_SCHEMA, + NULL_STYPE, }; /// An async encoder of DBN records. @@ -567,6 +552,26 @@ mod r#async { } } + /// Encodes a single DBN record of type `R`. + /// + /// Returns `true`if the pipe was closed. + /// + /// # Safety + /// `ts_out` must be `false` if `record` does not have an appended `ts_out + /// + /// # Errors + /// This function returns an error if it's unable to write to the underlying writer + /// or there's a serialization error. + pub async unsafe fn encode_ref( + &mut self, + record_ref: RecordRef<'_>, + ts_out: bool, + ) -> anyhow::Result { + rtype_ts_out_async_dispatch!(record_ref, ts_out, |rec| async move { + self.encode(rec).await + })? + } + /// Returns a mutable reference to the underlying writer. pub fn get_mut(&mut self) -> &mut W { &mut self.writer @@ -617,11 +622,14 @@ mod r#async { self.writer.write_u32_le(length).await?; self.encode_fixed_len_cstr::<{ crate::METADATA_DATASET_CSTR_LEN }>(&metadata.dataset) .await?; - self.writer.write_u16_le(metadata.schema as u16).await?; + self.writer + .write_u16_le(metadata.schema.map(|s| s as u16).unwrap_or(NULL_SCHEMA)) + .await?; self.encode_range_and_counts(metadata.start, metadata.end, metadata.limit) .await?; - // self.writer.write_all(&[metadata.compression as u8])?; - self.writer.write_u8(metadata.stype_in as u8).await?; + self.writer + .write_u8(metadata.stype_in.map(|s| s as u8).unwrap_or(NULL_STYPE)) + .await?; self.writer.write_u8(metadata.stype_out as u8).await?; self.writer.write_u8(metadata.ts_out as u8).await?; // padding @@ -779,8 +787,8 @@ mod r#async { let metadata = Metadata { version: crate::DBN_VERSION, dataset: "GLBX.MDP3".to_owned(), - schema: Schema::Mbp10, - stype_in: SType::Native, + schema: Some(Schema::Mbp10), + stype_in: Some(SType::Native), stype_out: SType::ProductId, start: 1657230820000000000, end: NonZeroU64::new(1658960170000000000), @@ -909,9 +917,9 @@ mod r#async { async fn test_encode_decode_nulls() { let metadata = MetadataBuilder::new() .dataset("XNAS.ITCH".to_owned()) - .schema(Schema::Mbo) + .schema(Some(Schema::Mbo)) .start(1697240529000000000) - .stype_in(SType::Native) + .stype_in(Some(SType::Native)) .stype_out(SType::ProductId) .build(); assert!(metadata.end.is_none()); diff --git a/rust/dbn/src/encode/json.rs b/rust/dbn/src/encode/json.rs index 3bb6b3e..252d1f7 100644 --- a/rust/dbn/src/encode/json.rs +++ b/rust/dbn/src/encode/json.rs @@ -116,6 +116,10 @@ where self.writer.flush()?; Ok(()) } + + fn flush(&mut self) -> anyhow::Result<()> { + Ok(self.writer.flush()?) + } } #[cfg(test)] @@ -475,11 +479,11 @@ mod tests { let metadata = Metadata { version: 1, dataset: "GLBX.MDP3".to_owned(), - schema: Schema::Ohlcv1H, + schema: Some(Schema::Ohlcv1H), start: 1662734705128748281, end: NonZeroU64::new(1662734720914876944), limit: None, - stype_in: SType::ProductId, + stype_in: Some(SType::ProductId), stype_out: SType::Native, ts_out: false, symbols: vec!["ESZ2".to_owned()], diff --git a/rust/dbn/src/encode/mod.rs b/rust/dbn/src/encode/mod.rs index a2b84bb..d4e636e 100644 --- a/rust/dbn/src/encode/mod.rs +++ b/rust/dbn/src/encode/mod.rs @@ -6,18 +6,15 @@ pub mod json; use std::{fmt, io}; -use anyhow::anyhow; use serde::Serialize; use streaming_iterator::StreamingIterator; use crate::{ decode::DecodeDbn, - enums::{Compression, Encoding, Schema}, - record::{ - HasRType, ImbalanceMsg, InstrumentDefMsg, MboMsg, Mbp10Msg, Mbp1Msg, OhlcvMsg, StatusMsg, - TbboMsg, TradeMsg, WithTsOut, - }, - Metadata, + enums::{Compression, Encoding}, + record::HasRType, + record_ref::RecordRef, + rtype_ts_out_dispatch, Metadata, }; use self::csv::serialize::CsvSerialize; @@ -26,9 +23,9 @@ use self::csv::serialize::CsvSerialize; pub trait DbnEncodable: HasRType + AsRef<[u8]> + CsvSerialize + fmt::Debug + Serialize {} impl DbnEncodable for T where T: HasRType + AsRef<[u8]> + CsvSerialize + fmt::Debug + Serialize {} -/// Trait for types that encode DBN records. +/// Trait for types that encode DBN records with mixed schemas. pub trait EncodeDbn { - /// Encode a single DBN record of type `R`. + /// Encodes a single DBN record of type `R`. /// /// Returns `true`if the pipe was closed. /// @@ -37,72 +34,79 @@ pub trait EncodeDbn { /// or there's a serialization error. fn encode_record(&mut self, record: &R) -> anyhow::Result; - /// Encode a slice of DBN records. + /// Encodes a slice of DBN records. /// /// # Errors /// This function returns an error if it's unable to write to the underlying writer /// or there's a serialization error. - fn encode_records(&mut self, records: &[R]) -> anyhow::Result<()>; + fn encode_records(&mut self, records: &[R]) -> anyhow::Result<()> { + for record in records { + if self.encode_record(record)? { + break; + } + } + self.flush()?; + Ok(()) + } - /// Encode a stream of DBN records. + /// Encodes a stream of DBN records. /// /// # Errors /// This function returns an error if it's unable to write to the underlying writer /// or there's a serialization error. fn encode_stream( &mut self, - stream: impl StreamingIterator, - ) -> anyhow::Result<()>; + mut stream: impl StreamingIterator, + ) -> anyhow::Result<()> { + while let Some(record) = stream.next() { + if self.encode_record(record)? { + break; + } + } + self.flush()?; + Ok(()) + } - /// Encode DBN records directly from a DBN decoder. + /// Flushes any buffered content to the true output. + /// + /// # Errors + /// This function returns an error if it's unable to flush the underlying writer. + fn flush(&mut self) -> anyhow::Result<()>; + + /// Encodes a single DBN record of type `R`. + /// + /// Returns `true`if the pipe was closed. + /// + /// # Safety + /// `ts_out` must be `false` if `record` does not have an appended `ts_out /// /// # Errors /// This function returns an error if it's unable to write to the underlying writer /// or there's a serialization error. - fn encode_decoded(&mut self, decoder: D) -> anyhow::Result<()> { - match (decoder.metadata().schema, decoder.metadata().ts_out) { - (Schema::Mbo, true) => { - self.encode_stream(decoder.decode_stream::>()?) - } - (Schema::Mbo, false) => self.encode_stream(decoder.decode_stream::()?), - (Schema::Mbp1, true) => { - self.encode_stream(decoder.decode_stream::>()?) - } - (Schema::Mbp1, false) => self.encode_stream(decoder.decode_stream::()?), - (Schema::Mbp10, true) => { - self.encode_stream(decoder.decode_stream::>()?) - } - (Schema::Mbp10, false) => self.encode_stream(decoder.decode_stream::()?), - (Schema::Tbbo, true) => { - self.encode_stream(decoder.decode_stream::>()?) - } - (Schema::Tbbo, false) => self.encode_stream(decoder.decode_stream::()?), - (Schema::Trades, true) => { - self.encode_stream(decoder.decode_stream::>()?) - } - (Schema::Trades, false) => self.encode_stream(decoder.decode_stream::()?), - (Schema::Ohlcv1S | Schema::Ohlcv1M | Schema::Ohlcv1H | Schema::Ohlcv1D, true) => { - self.encode_stream(decoder.decode_stream::>()?) - } - (Schema::Ohlcv1S | Schema::Ohlcv1M | Schema::Ohlcv1H | Schema::Ohlcv1D, false) => { - self.encode_stream(decoder.decode_stream::()?) - } - (Schema::Definition, true) => { - self.encode_stream(decoder.decode_stream::>()?) - } - (Schema::Definition, false) => { - self.encode_stream(decoder.decode_stream::()?) - } - (Schema::Imbalance, true) => { - self.encode_stream(decoder.decode_stream::>()?) - } - (Schema::Imbalance, false) => { - self.encode_stream(decoder.decode_stream::()?) + unsafe fn encode_record_ref( + &mut self, + record: RecordRef, + ts_out: bool, + ) -> anyhow::Result { + rtype_ts_out_dispatch!(record, ts_out, |rec| self.encode_record(rec))? + } + + /// Encodes DBN records directly from a DBN decoder. + /// + /// # Errors + /// This function returns an error if it's unable to write to the underlying writer + /// or there's a serialization error. + fn encode_decoded(&mut self, mut decoder: D) -> anyhow::Result<()> { + let ts_out = decoder.metadata().ts_out; + while let Some(record) = decoder.decode_record_ref()? { + // Safety: It's safe to cast to `WithTsOut` because we're passing in the `ts_out` + // from the metadata header. + if unsafe { self.encode_record_ref(record, ts_out)? } { + break; } - (Schema::Status, true) => self.encode_stream(decoder.decode_stream::()?), - (Schema::Status, false) => self.encode_stream(decoder.decode_stream::()?), - (Schema::Statistics, _) => Err(anyhow!("Not implemented")), } + self.flush()?; + Ok(()) } } @@ -260,6 +264,22 @@ where ) -> anyhow::Result<()> { self.0.encode_stream(stream) } + + fn flush(&mut self) -> anyhow::Result<()> { + self.0.flush() + } + + unsafe fn encode_record_ref( + &mut self, + record: RecordRef, + ts_out: bool, + ) -> anyhow::Result { + self.0.encode_record_ref(record, ts_out) + } + + fn encode_decoded(&mut self, decoder: D) -> anyhow::Result<()> { + self.0.encode_decoded(decoder) + } } impl<'a, W> EncodeDbn for DynEncoderImpl<'a, W> @@ -293,6 +313,31 @@ macro_rules! encoder_enum_dispatch { $(Self::$variant(v) => v.encode_stream(stream),)* } } + + fn flush(&mut self) -> anyhow::Result<()> { + match self { + $(Self::$variant(v) => v.flush(),)* + } + } + + unsafe fn encode_record_ref( + &mut self, + record: RecordRef, + ts_out: bool, + ) -> anyhow::Result { + match self { + $(Self::$variant(v) => v.encode_record_ref(record, ts_out),)* + } + } + + fn encode_decoded( + &mut self, + decoder: D, + ) -> anyhow::Result<()> { + match self { + $(Self::$variant(v) => v.encode_decoded(decoder),)* + } + } }; } diff --git a/rust/dbn/src/enums.rs b/rust/dbn/src/enums.rs index 9dc7ff0..809e930 100644 --- a/rust/dbn/src/enums.rs +++ b/rust/dbn/src/enums.rs @@ -294,21 +294,22 @@ pub mod rtype { pub const MBO: u8 = 0xA0; /// Get the corresponding `rtype` for the given `schema`. - pub fn from(schema: Schema) -> u8 { - match schema { - Schema::Mbo => MBO, - Schema::Mbp1 => MBP_1, - Schema::Mbp10 => MBP_10, - Schema::Tbbo => MBP_1, - Schema::Trades => MBP_0, - Schema::Ohlcv1S => OHLCV_1S, - Schema::Ohlcv1M => OHLCV_1M, - Schema::Ohlcv1H => OHLCV_1H, - Schema::Ohlcv1D => OHLCV_1D, - Schema::Definition => INSTRUMENT_DEF, - Schema::Statistics => unimplemented!("Statistics is not yet supported"), - Schema::Status => STATUS, - Schema::Imbalance => IMBALANCE, + impl From for RType { + fn from(schema: Schema) -> Self { + match schema { + Schema::Mbo => RType::Mbo, + Schema::Mbp1 | Schema::Tbbo => RType::Mbp1, + Schema::Mbp10 => RType::Mbp10, + Schema::Trades => RType::Mbp0, + Schema::Ohlcv1S => RType::Ohlcv1S, + Schema::Ohlcv1M => RType::Ohlcv1M, + Schema::Ohlcv1H => RType::Ohlcv1H, + Schema::Ohlcv1D => RType::Ohlcv1D, + Schema::Definition => RType::InstrumentDef, + Schema::Statistics => unimplemented!("Statistics is not yet supported"), + Schema::Status => RType::Status, + Schema::Imbalance => RType::Imbalance, + } } } diff --git a/rust/dbn/src/lib.rs b/rust/dbn/src/lib.rs index 483a3ff..3a128ba 100644 --- a/rust/dbn/src/lib.rs +++ b/rust/dbn/src/lib.rs @@ -29,6 +29,8 @@ const SYMBOL_CSTR_LEN: usize = 22; const NULL_END: u64 = u64::MAX; const NULL_LIMIT: u64 = 0; const NULL_RECORD_COUNT: u64 = u64::MAX; +const NULL_SCHEMA: u16 = u16::MAX; +const NULL_STYPE: u8 = u8::MAX; /// The sentinel value for an unset or null price. pub const UNDEF_PRICE: i64 = i64::MAX; diff --git a/rust/dbn/src/macros.rs b/rust/dbn/src/macros.rs index d1afdcc..afe1c68 100644 --- a/rust/dbn/src/macros.rs +++ b/rust/dbn/src/macros.rs @@ -58,6 +58,27 @@ macro_rules! rtype_ts_out_dispatch { }}; } +/// Specializes a generic async function to all record types and dispatches based +/// `rtype` and `ts_out`. +/// +/// # Errors +/// This macro returns an error if the rtype is not recognized. +#[macro_export] +macro_rules! rtype_ts_out_async_dispatch { + ($rec_ref:expr, $ts_out:expr, $generic_fn:expr $(,$arg:expr)*) => {{ + macro_rules! maybe_ts_out { + ($r:ty) => {{ + if $ts_out { + $generic_fn($rec_ref.get_unchecked::>() $(, $arg)*).await + } else { + $generic_fn(unsafe { $rec_ref.get_unchecked::<$r>() } $(, $arg)*).await + } + }}; + } + $crate::rtype_dispatch_base!($rec_ref, maybe_ts_out) + }}; +} + /// Specializes a generic function to all record types and dispatches based `rtype`. /// /// # Errors diff --git a/rust/dbn/src/metadata.rs b/rust/dbn/src/metadata.rs index 62e39fa..9173c5d 100644 --- a/rust/dbn/src/metadata.rs +++ b/rust/dbn/src/metadata.rs @@ -18,28 +18,33 @@ use crate::record::as_u8_slice; #[cfg_attr(feature = "python", pyo3::pyclass(module = "databento_dbn"))] #[cfg_attr(not(feature = "python"), derive(MockPyo3))] // bring `pyo3` attribute into scope pub struct Metadata { - /// The DBN schema version number. Newly-encoded DBN files will use [`crate::DBN_VERSION`]. + /// The DBN schema version number. Newly-encoded DBN files will use + /// [`crate::DBN_VERSION`]. #[pyo3(get)] pub version: u8, /// The dataset code. #[pyo3(get)] pub dataset: String, - /// The data record schema. Specifies which record type is stored in the Zstd-compressed DBN file. + /// The data record schema. Specifies which record types are in the DBN stream. + /// `None` indicates the DBN stream _may_ contain more than one record type. #[pyo3(get)] - pub schema: Schema, - /// The UNIX nanosecond timestamp of the query start, or the first record if the file was split. + pub schema: Option, + /// The UNIX nanosecond timestamp of the query start, or the first record if the + /// file was split. #[pyo3(get)] pub start: u64, - /// The UNIX nanosecond timestamp of the query end, or the last record if the file was split. + /// The UNIX nanosecond timestamp of the query end, or the last record if the file + /// was split. #[pyo3(get)] pub end: Option, /// The optional maximum number of records for the query. #[pyo3(get)] #[serde(serialize_with = "serialize_as_raw")] pub limit: Option, - /// The input symbology type to map from. + /// The input symbology type to map from. `None` indicates a mix, such as in the + /// case of live data. #[pyo3(get)] - pub stype_in: SType, + pub stype_in: Option, /// The output symbology type to map to. #[pyo3(get)] pub stype_out: SType, @@ -67,9 +72,7 @@ pub struct Metadata { /// /// # Required fields /// - [`dataset`](Metadata::dataset) -/// - [`schema`](Metadata::schema) /// - [`start`](Metadata::start) -/// - [`stype_in`](Metadata::stype_in) /// - [`stype_out`](Metadata::stype_out) #[derive(Debug)] pub struct MetadataBuilder { @@ -125,7 +128,10 @@ impl MetadataBuilder { } /// Sets the [`schema`](Metadata::schema) and returns the builder. - pub fn schema(self, schema: Schema) -> MetadataBuilder { + pub fn schema( + self, + schema: Option, + ) -> MetadataBuilder, Start, StIn, StOut> { MetadataBuilder { version: self.version, dataset: self.dataset, @@ -175,7 +181,10 @@ impl MetadataBuilder { } /// Sets the [`stype_in`](Metadata::stype_in) and returns the builder. - pub fn stype_in(self, stype_in: SType) -> MetadataBuilder { + pub fn stype_in( + self, + stype_in: Option, + ) -> MetadataBuilder, StOut> { MetadataBuilder { version: self.version, dataset: self.dataset, @@ -243,7 +252,7 @@ impl MetadataBuilder { } } -impl MetadataBuilder { +impl MetadataBuilder, u64, Option, SType> { /// Constructs a [`Metadata`] object. The availability of this method indicates all /// required fields have been set. pub fn build(self) -> Metadata { diff --git a/rust/dbn/src/python.rs b/rust/dbn/src/python.rs index 87df15b..3f2c5fa 100644 --- a/rust/dbn/src/python.rs +++ b/rust/dbn/src/python.rs @@ -29,65 +29,6 @@ use crate::{ }; use crate::{MappingInterval, Metadata, SymbolMapping}; -/// Decodes the given Python `bytes` to `Metadata`. Returns a `Metadata` object with -/// all the DBN metadata attributes. -/// -/// # Errors -/// This function returns an error if the metadata cannot be parsed from `bytes`. -#[pyfunction] -pub fn decode_metadata(bytes: &PyBytes) -> PyResult { - let reader = io::BufReader::new(bytes.as_bytes()); - Ok(DynDecoder::inferred_with_buffer(reader) - .map_err(to_val_err)? - .metadata() - .clone()) -} - -/// Encodes the given metadata into the DBN metadata binary format. -/// Returns Python `bytes`. -/// -/// # Errors -/// This function returns an error if any of the enum arguments cannot be converted to -/// their Rust equivalents. It will also return an error if there's an issue writing -/// the encoded metadata to bytes. -#[pyfunction] -pub fn encode_metadata( - py: Python<'_>, - dataset: String, - schema: Schema, - start: u64, - stype_in: SType, - stype_out: SType, - symbols: Vec, - partial: Vec, - not_found: Vec, - mappings: Vec, - end: Option, - limit: Option, -) -> PyResult> { - let metadata = MetadataBuilder::new() - .dataset(dataset) - .schema(schema) - .start(start) - .end(NonZeroU64::new(end.unwrap_or(0))) - .limit(NonZeroU64::new(limit.unwrap_or(0))) - .stype_in(stype_in) - .stype_out(stype_out) - .symbols(symbols) - .partial(partial) - .not_found(not_found) - .mappings(mappings) - .build(); - let mut encoded = Vec::with_capacity(1024); - MetadataEncoder::new(&mut encoded) - .encode(&metadata) - .map_err(|e| { - println!("{e:?}"); - to_val_err(e) - })?; - Ok(PyBytes::new(py, encoded.as_slice()).into()) -} - /// Updates existing fields that have already been written to the given file. #[pyfunction] pub fn update_encoded_metadata( @@ -124,38 +65,24 @@ pub fn write_dbn_file( _py: Python<'_>, file: PyFileLike, compression: Compression, - dataset: String, - schema: Schema, - start: u64, - stype_in: SType, - stype_out: SType, + metadata: &Metadata, records: Vec<&PyAny>, - end: Option, ) -> PyResult<()> { - let mut metadata_builder = MetadataBuilder::new() - .schema(schema) - .dataset(dataset) - .stype_in(stype_in) - .stype_out(stype_out) - .start(start); - if let Some(end) = end { - metadata_builder = metadata_builder.end(NonZeroU64::new(end)) - } - let metadata = metadata_builder.build(); let writer = DynWriter::new(file, compression).map_err(to_val_err)?; - let encoder = dbn::Encoder::new(writer, &metadata).map_err(to_val_err)?; - match schema { - Schema::Mbo => encode_pyrecs::(encoder, &records), - Schema::Mbp1 => encode_pyrecs::(encoder, &records), - Schema::Mbp10 => encode_pyrecs::(encoder, &records), - Schema::Tbbo => encode_pyrecs::(encoder, &records), - Schema::Trades => encode_pyrecs::(encoder, &records), - Schema::Ohlcv1S | Schema::Ohlcv1M | Schema::Ohlcv1H | Schema::Ohlcv1D => { - encode_pyrecs::(encoder, &records) - } - Schema::Definition => encode_pyrecs::(encoder, &records), - Schema::Imbalance => encode_pyrecs::(encoder, &records), - Schema::Statistics | Schema::Status => Err(PyValueError::new_err( + let encoder = dbn::Encoder::new(writer, metadata).map_err(to_val_err)?; + match metadata.schema { + Some(Schema::Mbo) => encode_pyrecs::(encoder, &records), + Some(Schema::Mbp1) => encode_pyrecs::(encoder, &records), + Some(Schema::Mbp10) => encode_pyrecs::(encoder, &records), + Some(Schema::Tbbo) => encode_pyrecs::(encoder, &records), + Some(Schema::Trades) => encode_pyrecs::(encoder, &records), + Some(Schema::Ohlcv1S) + | Some(Schema::Ohlcv1M) + | Some(Schema::Ohlcv1H) + | Some(Schema::Ohlcv1D) => encode_pyrecs::(encoder, &records), + Some(Schema::Definition) => encode_pyrecs::(encoder, &records), + Some(Schema::Imbalance) => encode_pyrecs::(encoder, &records), + _ => Err(PyValueError::new_err( "Unsupported schema type for writing DBN files", )), } @@ -203,16 +130,44 @@ impl<'source> FromPyObject<'source> for PyFileLike { #[pymethods] impl Metadata { + #[new] + fn py_new( + dataset: String, + start: u64, + stype_out: SType, + symbols: Vec, + partial: Vec, + not_found: Vec, + mappings: Vec, + schema: Option, + stype_in: Option, + end: Option, + limit: Option, + ts_out: Option, + ) -> Metadata { + MetadataBuilder::new() + .dataset(dataset) + .start(start) + .stype_out(stype_out) + .symbols(symbols) + .partial(partial) + .not_found(not_found) + .mappings(mappings) + .schema(schema) + .stype_in(stype_in) + .end(NonZeroU64::new(end.unwrap_or_default())) + .limit(NonZeroU64::new(limit.unwrap_or_default())) + .ts_out(ts_out.unwrap_or_default()) + .build() + } + fn __repr__(&self) -> String { format!("{self:?}") } /// Encodes Metadata back into DBN format. fn __bytes__(&self, py: Python<'_>) -> PyResult> { - let mut buffer = Vec::new(); - let mut encoder = MetadataEncoder::new(&mut buffer); - encoder.encode(self).map_err(to_val_err)?; - Ok(PyBytes::new(py, buffer.as_slice()).into()) + self.py_encode(py) } #[getter] @@ -223,6 +178,24 @@ impl Metadata { } res } + + #[pyo3(name = "decode")] + #[staticmethod] + fn py_decode(bytes: &PyBytes) -> PyResult { + let reader = io::BufReader::new(bytes.as_bytes()); + Ok(DynDecoder::inferred_with_buffer(reader) + .map_err(to_val_err)? + .metadata() + .clone()) + } + + #[pyo3(name = "encode")] + fn py_encode(&self, py: Python<'_>) -> PyResult> { + let mut buffer = Vec::new(); + let mut encoder = MetadataEncoder::new(&mut buffer); + encoder.encode(self).map_err(to_val_err)?; + Ok(PyBytes::new(py, buffer.as_slice()).into()) + } } impl IntoPy for SymbolMapping { @@ -1141,6 +1114,7 @@ mod tests { use super::*; use crate::decode::{dbn, DecodeDbn}; + use crate::metadata::MetadataBuilder; const DBN_PATH: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/../../tests/data"); @@ -1216,18 +1190,20 @@ mod tests { let mock_file = MockPyFile::new(); let output_buf = mock_file.inner(); let mock_file = Py::new(py, mock_file).unwrap().into_py(py); + let metadata = MetadataBuilder::new() + .dataset(DATASET.to_owned()) + .schema(Some($schema)) + .start(0) + .stype_in(Some(STYPE)) + .stype_out(STYPE) + .build(); // Call target function write_dbn_file( py, mock_file.extract(py).unwrap(), Compression::ZStd, - DATASET.to_owned(), - $schema, - 0, - STYPE, - STYPE, + &metadata, recs.iter().map(|r| r.as_ref(py)).collect(), - None, ) .unwrap(); @@ -1244,9 +1220,9 @@ mod tests { // contents let py_decoder = dbn::Decoder::with_zstd(Cursor::new(&output_buf)).unwrap(); let metadata = py_decoder.metadata().clone(); - assert_eq!(metadata.schema, $schema); + assert_eq!(metadata.schema, Some($schema)); assert_eq!(metadata.dataset, DATASET); - assert_eq!(metadata.stype_in, STYPE); + assert_eq!(metadata.stype_in, Some(STYPE)); assert_eq!(metadata.stype_out, STYPE); let decoder = dbn::Decoder::from_zstd_file(format!( "{DBN_PATH}/test_data.{}.dbn.zst", diff --git a/tests/data/test_data.ohlcv-1h.dbn b/tests/data/test_data.ohlcv-1h.dbn index 9b3fde0caa066d6683c58b15b73d3e7834b165ff..24b2fa942172f2326632e7a8924de0b79d8b482b 100644 GIT binary patch delta 17 ZcmdnTw2x`Rc}As)7t9%zCUY>F0RTGI1&{y$ delta 17 ZcmdnTw2x`Rc}BsB7t9$2Cvz~G0RTDI1#SQU diff --git a/tests/data/test_data.ohlcv-1h.dbn.zst b/tests/data/test_data.ohlcv-1h.dbn.zst index 37a7257fe49cfbe2f8905f4cb5d72c92ecc98cdf..3fa9e7fb40df879fa437cbbdad0c2141d138308c 100644 GIT binary patch delta 90 zcmdnNxPy^PW2^pOmIxPChL(w3>T>@X85kKD7+ixr48aT@dlN=pAT~9zFyd31=0|sM)gTMh{8al=JOJ-)7_a~U delta 90 zcmdnNxPy^PW2^pOmIxPChLnk1>N1Rs3=9me!5)STAO?@U2_r8Mo0?b{@d-}!cTwkM qa2C8B)gXAxF!`fSXf!NfZYkvK{jkh9t3h#j;CO)R)3^lw5_<%-41a`Ut0CwjY5C8xG delta 96 zcmZ3)xQLNUW2^pOmWU}V3`-_*smm}jGB7Z>274GXfEYaXCXBp5Y-(a*#3wk>Uqe@b x!PvTi$yt?Q+BUHUlbN$Mzkc7wTM<2l_dpRd^MSjkxEa#e1lWLje_2?!002%D87Ke% diff --git a/tests/data/test_data.ohlcv-1s.dbn b/tests/data/test_data.ohlcv-1s.dbn index 3fe70eee4bff074ab8e9dbf51d725ccf1ceee5da..79d9000148b34bfff517a452aedd277d93b53c4f 100644 GIT binary patch delta 17 ZcmdnTw2x`Rc}9ha7t9$ICUY>F0RTF<1&jay delta 17 ZcmdnTw2x`Rc}BsB7t9$2Cvz~G0RTDI1#SQU diff --git a/tests/data/test_data.ohlcv-1s.dbn.zst b/tests/data/test_data.ohlcv-1s.dbn.zst index 5d9d27b2193ef9368c24446dcc75b3be2f37e62b..a8135ccbd40cfef34c43fb28add45d1a6c962adc 100644 GIT binary patch delta 74 zcmbQuIGd46W2^pOmWUV@2A7Fk>T>@X85kKD7+ixr48aT@dlN=pAT~9zFyd2~=&Qge YFfmVBfJKI-kMW08gDg<7SJm@#08h0LRsaA1 delta 74 zcmbQuIGd46W2^pOmWUV@29=3i>N1Rs3=9me!5)STAO?@U2_r8Mo0?b{@d-}!S6~#J aSRgIL#@4WVsyM?L9t9qts=y`NJ^%nHOb_G$ From b14251532c505c1e26a29920fc9426c66aa30678 Mon Sep 17 00:00:00 2001 From: Carter Green Date: Wed, 12 Apr 2023 15:53:32 -0500 Subject: [PATCH 08/22] ADD: Add statistics schema to DBN --- CHANGELOG.md | 2 + python/src/lib.rs | 3 +- rust/dbn/src/encode/csv.rs | 72 +++++++++++++++++++++++++----- rust/dbn/src/encode/json.rs | 39 +++++++++++++++- rust/dbn/src/enums.rs | 61 ++++++++++++++++++++++--- rust/dbn/src/macros.rs | 1 + rust/dbn/src/python.rs | 60 +++++++++++++++++++++++-- rust/dbn/src/record.rs | 89 ++++++++++++++++++++++++++++++++++++- 8 files changed, 303 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bbeb995..5229e8c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,10 +1,12 @@ # Changelog ## 0.5.0 - TBD +- Added support for Statistics schema - Changed `schema` and `stype_in` to optional in `Metadata` to support live data - Added `RType` enum for exhaustive pattern matching - Added `&str` getters for more `c_char` array record fields - Changed `DbnDecoder.decode` to always return a list of tuples +- Fixed value associated with `Side::None` - Fixed issue with decoding partial records in Python `DbnDecoder` - Fixed missing type hint for Metadata bytes support diff --git a/python/src/lib.rs b/python/src/lib.rs index 3376898..8eca6c3 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -8,7 +8,7 @@ use dbn::{ python::to_val_err, record::{ BidAskPair, ErrorMsg, HasRType, ImbalanceMsg, InstrumentDefMsg, MboMsg, Mbp10Msg, Mbp1Msg, - OhlcvMsg, RecordHeader, StatusMsg, SymbolMappingMsg, SystemMsg, TradeMsg, + OhlcvMsg, RecordHeader, StatMsg, StatusMsg, SymbolMappingMsg, SystemMsg, TradeMsg, }, rtype_ts_out_dispatch, Metadata, }; @@ -39,6 +39,7 @@ fn databento_dbn(_py: Python<'_>, m: &PyModule) -> PyResult<()> { checked_add_class::(m)?; checked_add_class::(m)?; checked_add_class::(m)?; + checked_add_class::(m)?; Ok(()) } diff --git a/rust/dbn/src/encode/csv.rs b/rust/dbn/src/encode/csv.rs index e7dd435..a649522 100644 --- a/rust/dbn/src/encode/csv.rs +++ b/rust/dbn/src/encode/csv.rs @@ -9,8 +9,8 @@ use crate::{ decode::DecodeDbn, enums::{RType, Schema}, record::{ - ImbalanceMsg, InstrumentDefMsg, MboMsg, Mbp10Msg, Mbp1Msg, OhlcvMsg, StatusMsg, TbboMsg, - TradeMsg, + ImbalanceMsg, InstrumentDefMsg, MboMsg, Mbp10Msg, Mbp1Msg, OhlcvMsg, StatMsg, StatusMsg, + TbboMsg, TradeMsg, }, }; @@ -113,12 +113,11 @@ where Schema::Mbp10 => self.encode_header::(), Schema::Tbbo => self.encode_header::(), Schema::Trades => self.encode_header::(), - Schema::Ohlcv1S => self.encode_header::(), - Schema::Ohlcv1M => self.encode_header::(), - Schema::Ohlcv1H => self.encode_header::(), - Schema::Ohlcv1D => self.encode_header::(), + Schema::Ohlcv1S | Schema::Ohlcv1M | Schema::Ohlcv1H | Schema::Ohlcv1D => { + self.encode_header::() + } Schema::Definition => self.encode_header::(), - Schema::Statistics => return Err(anyhow!("Unsupported schema: statistics")), + Schema::Statistics => self.encode_header::(), Schema::Status => self.encode_header::(), Schema::Imbalance => self.encode_header::(), }?; @@ -149,7 +148,7 @@ pub(crate) mod serialize { use crate::record::{ ErrorMsg, HasRType, ImbalanceMsg, InstrumentDefMsg, MboMsg, Mbp10Msg, Mbp1Msg, OhlcvMsg, - StatusMsg, SymbolMappingMsg, SystemMsg, TradeMsg, WithTsOut, + StatMsg, StatusMsg, SymbolMappingMsg, SystemMsg, TradeMsg, WithTsOut, }; /// Because of the flat nature of CSVs, there are several limitations in the @@ -490,6 +489,29 @@ pub(crate) mod serialize { } } + impl CsvSerialize for StatMsg { + fn serialize_header(csv_writer: &mut Writer) -> csv::Result<()> { + [ + "rtype", + "publisher_id", + "product_id", + "ts_event", + "ts_recv", + "ts_ref", + "price", + "quantity", + "sequence", + "ts_in_delta", + "stat_type", + "channel_id", + "update_action", + "stat_flags", + ] + .iter() + .try_for_each(|header| csv_writer.write_field(header)) + } + } + impl CsvSerialize for ErrorMsg { fn serialize_header(csv_writer: &mut Writer) -> csv::Result<()> { ["rtype", "publisher_id", "product_id", "ts_event", "err"] @@ -543,10 +565,13 @@ mod tests { use super::*; use crate::{ encode::test_data::{VecStream, BID_ASK, RECORD_HEADER}, - enums::{InstrumentClass, SecurityUpdateAction, UserDefinedInstrument}, + enums::{ + InstrumentClass, SecurityUpdateAction, StatType, StatUpdateAction, + UserDefinedInstrument, + }, record::{ str_to_c_chars, ImbalanceMsg, InstrumentDefMsg, MboMsg, Mbp10Msg, Mbp1Msg, OhlcvMsg, - StatusMsg, TradeMsg, WithTsOut, + StatMsg, StatusMsg, TradeMsg, WithTsOut, }, }; @@ -826,7 +851,7 @@ mod tests { } #[test] - fn test_encode_imbalance_records() { + fn test_imbalance_encode_records() { let data = vec![ImbalanceMsg { hd: RECORD_HEADER, ts_recv: 1, @@ -862,4 +887,29 @@ mod tests { format!("{HEADER_CSV},1,2,3,4,5,6,7,8,9,10,11,12,13,B,A,14,15,16,A,N") ); } + + #[test] + fn test_stat_encode_stream() { + let data = vec![StatMsg { + hd: RECORD_HEADER, + ts_recv: 1, + ts_ref: 2, + price: 3, + quantity: 0, + sequence: 4, + ts_in_delta: 5, + stat_type: StatType::OpeningPrice as u16, + channel_id: 7, + update_action: StatUpdateAction::New as u8, + stat_flags: 0, + _dummy: Default::default(), + }]; + let mut buffer = Vec::new(); + let writer = BufWriter::new(&mut buffer); + Encoder::new(writer) + .encode_stream(VecStream::new(data)) + .unwrap(); + let line = extract_2nd_line(buffer); + assert_eq!(line, format!("{HEADER_CSV},1,2,3,0,4,5,1,7,1,0")); + } } diff --git a/rust/dbn/src/encode/json.rs b/rust/dbn/src/encode/json.rs index 252d1f7..b3fab43 100644 --- a/rust/dbn/src/encode/json.rs +++ b/rust/dbn/src/encode/json.rs @@ -129,10 +129,13 @@ mod tests { use super::*; use crate::{ encode::test_data::{VecStream, BID_ASK, RECORD_HEADER}, - enums::{InstrumentClass, SType, Schema, SecurityUpdateAction, UserDefinedInstrument}, + enums::{ + InstrumentClass, SType, Schema, SecurityUpdateAction, StatType, StatUpdateAction, + UserDefinedInstrument, + }, record::{ str_to_c_chars, ImbalanceMsg, InstrumentDefMsg, MboMsg, Mbp10Msg, Mbp1Msg, OhlcvMsg, - StatusMsg, TradeMsg, WithTsOut, + StatMsg, StatusMsg, TradeMsg, WithTsOut, }, MappingInterval, SymbolMapping, }; @@ -474,6 +477,38 @@ mod tests { ); } + #[test] + fn test_stat_write_json() { + let data = vec![StatMsg { + hd: RECORD_HEADER, + ts_recv: 1, + ts_ref: 2, + price: 3, + quantity: 0, + sequence: 4, + ts_in_delta: 5, + stat_type: StatType::OpeningPrice as u16, + channel_id: 7, + update_action: StatUpdateAction::New as u8, + stat_flags: 0, + _dummy: Default::default(), + }]; + let slice_res = write_json_to_string(data.as_slice(), false); + let stream_res = write_json_stream_to_string(data, false); + + assert_eq!(slice_res, stream_res); + assert_eq!( + slice_res, + format!( + "{{{HEADER_JSON},{}}}\n", + concat!( + r#""ts_recv":"1","ts_ref":"2","price":3,"quantity":0,"sequence":4,"#, + r#""ts_in_delta":5,"stat_type":1,"channel_id":7,"update_action":1,"stat_flags":0"#, + ) + ) + ); + } + #[test] fn test_metadata_write_json() { let metadata = Metadata { diff --git a/rust/dbn/src/enums.rs b/rust/dbn/src/enums.rs index 809e930..308f7ac 100644 --- a/rust/dbn/src/enums.rs +++ b/rust/dbn/src/enums.rs @@ -16,7 +16,7 @@ pub enum Side { /// A buy order. Bid = b'B', /// None or unknown. - None, + None = b'N', } impl From for char { @@ -254,6 +254,8 @@ pub mod rtype { SymbolMapping = SYMBOL_MAPPING, /// A non-error message. Also used for heartbeats. System = SYSTEM, + /// Statistics from the publisher (not calculated by Databento). + Statistics = STATISTICS, /// Market by order. Mbo = MBO, } @@ -290,6 +292,8 @@ pub mod rtype { pub const SYMBOL_MAPPING: u8 = 0x16; /// A non-error message. Also used for heartbeats. pub const SYSTEM: u8 = 0x17; + /// Statistics from the publisher (not calculated by Databento). + pub const STATISTICS: u8 = 0x18; /// Market by order. pub const MBO: u8 = 0xA0; @@ -306,7 +310,7 @@ pub mod rtype { Schema::Ohlcv1H => RType::Ohlcv1H, Schema::Ohlcv1D => RType::Ohlcv1D, Schema::Definition => RType::InstrumentDef, - Schema::Statistics => unimplemented!("Statistics is not yet supported"), + Schema::Statistics => RType::Statistics, Schema::Status => RType::Status, Schema::Imbalance => RType::Imbalance, } @@ -329,6 +333,7 @@ pub mod rtype { STATUS => Some(Schema::Status), INSTRUMENT_DEF => Some(Schema::Definition), IMBALANCE => Some(Schema::Imbalance), + STATISTICS => Some(Schema::Statistics), MBO => Some(Schema::Mbo), _ => None, } @@ -360,7 +365,7 @@ pub enum Schema { Ohlcv1D = 8, /// Instrument definitions. Definition = 9, - #[doc(hidden)] + /// Additional data disseminated by publishers. Statistics = 10, /// Exchange status. #[doc(hidden)] @@ -370,7 +375,7 @@ pub enum Schema { } /// The number of [`Schema`]s. -pub const SCHEMA_COUNT: usize = 12; +pub const SCHEMA_COUNT: usize = 13; impl std::str::FromStr for Schema { type Err = ConversionError; @@ -550,13 +555,17 @@ pub mod flags { pub const MAYBE_BAD_BOOK: u8 = 1 << 2; } +/// The type of [`InstrumentDefMsg`](crate::record::InstrumentDefMsg) update. #[repr(u8)] #[derive(Clone, Copy, Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)] -#[doc(hidden)] pub enum SecurityUpdateAction { + /// A new instrument definition. Add = b'A', + /// A modified instrument definition of an existing one. Modify = b'M', + /// Removal of an instrument definition. Delete = b'D', + #[doc(hidden)] #[deprecated = "Still present in legacy files."] Invalid = b'~', } @@ -566,3 +575,45 @@ impl Serialize for SecurityUpdateAction { serializer.serialize_char(char::from(*self as u8)) } } + +/// The type of [`StatMsg`](crate::record::StatMsg) update. +#[repr(u16)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)] +pub enum StatType { + /// The price of the first trade of an instrument. `price` will be set. + OpeningPrice = 1, + /// The settlement price of an instrument. `price` will be set and `flags` indicate + /// whether the price is final or preliminary and actual or theoretical. + SettlementPrice = 2, + /// The lowest trade price of an instrument during the trading session. `price` will + /// be set. + TradingSessionLowPrice = 3, + /// The highest trade price of an instrument during the trading session. `price` will + /// be set. + TradingSessionHighPrice = 4, + /// The number of contracts cleared for an instrument on the previous trading date. + /// `quantity` will be set. + ClearedVolume = 5, + /// The lowest offer price for an instrument during the trading session. `price` + /// will be set. + LowestOffer = 6, + /// The highest bid price for an instrument during the trading session. `price` + /// will be set. + HighestBid = 7, + /// The current number of outstanding contracts of an instrument. `quantity` will + // be set. + OpenInterest = 8, + /// The volume-weighted average price (VWAP) for a fixing period. `price` will be + /// set. + FixingPrice = 9, +} + +/// The type of [`StatMsg`](crate::record::StatMsg) update. +#[repr(u8)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)] +pub enum StatUpdateAction { + /// + New = 1, + /// + Delete = 2, +} diff --git a/rust/dbn/src/macros.rs b/rust/dbn/src/macros.rs index afe1c68..6c39245 100644 --- a/rust/dbn/src/macros.rs +++ b/rust/dbn/src/macros.rs @@ -26,6 +26,7 @@ macro_rules! rtype_dispatch_base { RType::Error => $handler!(ErrorMsg), RType::SymbolMapping => $handler!(SymbolMappingMsg), RType::System => $handler!(SystemMsg), + RType::Statistics => $handler!(StatMsg), RType::Mbo => $handler!(MboMsg), }), Err(e) => Err(e), diff --git a/rust/dbn/src/python.rs b/rust/dbn/src/python.rs index 3f2c5fa..34be6d4 100644 --- a/rust/dbn/src/python.rs +++ b/rust/dbn/src/python.rs @@ -18,12 +18,15 @@ use crate::{ dbn::{self, MetadataEncoder}, DbnEncodable, DynWriter, EncodeDbn, }, - enums::{rtype, Compression, SType, Schema, SecurityUpdateAction, UserDefinedInstrument}, + enums::{ + rtype, Compression, SType, Schema, SecurityUpdateAction, StatUpdateAction, + UserDefinedInstrument, + }, metadata::MetadataBuilder, record::{ str_to_c_chars, BidAskPair, ErrorMsg, HasRType, ImbalanceMsg, InstrumentDefMsg, MboMsg, - Mbp10Msg, Mbp1Msg, OhlcvMsg, RecordHeader, StatusMsg, SymbolMappingMsg, SystemMsg, TbboMsg, - TradeMsg, WithTsOut, + Mbp10Msg, Mbp1Msg, OhlcvMsg, RecordHeader, StatMsg, StatusMsg, SymbolMappingMsg, SystemMsg, + TbboMsg, TradeMsg, WithTsOut, }, UNDEF_ORDER_SIZE, UNDEF_PRICE, }; @@ -82,7 +85,8 @@ pub fn write_dbn_file( | Some(Schema::Ohlcv1D) => encode_pyrecs::(encoder, &records), Some(Schema::Definition) => encode_pyrecs::(encoder, &records), Some(Schema::Imbalance) => encode_pyrecs::(encoder, &records), - _ => Err(PyValueError::new_err( + Some(Schema::Statistics) => encode_pyrecs::(encoder, &records), + Some(Schema::Status) | None => Err(PyValueError::new_err( "Unsupported schema type for writing DBN files", )), } @@ -998,6 +1002,54 @@ impl ImbalanceMsg { } } +#[pymethods] +impl StatMsg { + #[new] + fn py_new( + publisher_id: u16, + product_id: u32, + ts_event: u64, + ts_recv: u64, + ts_ref: u64, + price: i64, + quantity: i32, + sequence: u32, + ts_in_delta: i32, + stat_type: u16, + channel_id: u16, + update_action: Option, + stat_flags: Option, + ) -> Self { + Self { + hd: RecordHeader::new::(rtype::STATISTICS, publisher_id, product_id, ts_event), + ts_recv, + ts_ref, + price, + quantity, + sequence, + ts_in_delta, + stat_type, + channel_id, + update_action: update_action.unwrap_or(StatUpdateAction::New as u8), + stat_flags: stat_flags.unwrap_or_default(), + _dummy: Default::default(), + } + } + + fn __repr__(&self) -> String { + format!("{self:?}") + } + + fn __bytes__(&self) -> &[u8] { + self.as_ref() + } + + #[pyo3(name = "record_size")] + fn py_record_size(&self) -> usize { + self.record_size() + } +} + #[pymethods] impl ErrorMsg { #[new] diff --git a/rust/dbn/src/record.rs b/rust/dbn/src/record.rs index fbd68bb..eca0528 100644 --- a/rust/dbn/src/record.rs +++ b/rust/dbn/src/record.rs @@ -14,7 +14,8 @@ pub use dbn_macros::MockPyo3; use crate::{ enums::{ rtype::{self, RType}, - Action, InstrumentClass, MatchAlgorithm, SecurityUpdateAction, Side, UserDefinedInstrument, + Action, InstrumentClass, MatchAlgorithm, SecurityUpdateAction, Side, StatType, + StatUpdateAction, UserDefinedInstrument, }, error::ConversionError, }; @@ -581,6 +582,50 @@ pub struct ImbalanceMsg { pub _dummy: [c_char; 1], } +/// A statistics message. A catchall for various data disseminated by publishers. +/// The [`stat_type`](Self::stat_type) indicates the statistic contained in the message. +#[repr(C)] +#[derive(Clone, Debug, PartialEq, Eq, Serialize)] +#[cfg_attr(feature = "trivial_copy", derive(Copy))] +#[cfg_attr( + feature = "python", + pyo3::pyclass(get_all, set_all, module = "databento_dbn") +)] +pub struct StatMsg { + /// The common header. + pub hd: RecordHeader, + /// The capture-server-received timestamp expressed as the number of nanoseconds + /// since the UNIX epoch. + #[serde(serialize_with = "serialize_large_u64")] + pub ts_recv: u64, + /// Reference timestamp expressed as the number of nanoseconds since the UNIX epoch. + #[serde(serialize_with = "serialize_large_u64")] + pub ts_ref: u64, + /// The value for price statistics expressed as a signed integer where every 1 unit + /// corresponds to 1e-9, i.e. 1/1,000,000,000 or 0.000000001. + pub price: i64, + /// The value for non-price statistics. + pub quantity: i32, + /// The message sequence number assigned at the venue. + pub sequence: u32, + /// The delta of `ts_recv - ts_exchange_send`, max 2 seconds. + pub ts_in_delta: i32, + /// The type of statistic value contained in the message. Refer to the + /// [`StatType`](crate::enums::StatType) for variants. + pub stat_type: u16, + /// A channel ID within the venue. + pub channel_id: u16, + /// Indicates if the statistic is new added or deleted. Deleted is only used for a + /// couple stat types. + pub update_action: u8, + /// Additional flags associate with certain stat types. + pub stat_flags: u8, + // Filler for alignment + #[serde(skip)] + #[doc(hidden)] + pub _dummy: [c_char; 6], +} + /// An error message from the Databento Live Subscription Gateway (LSG). #[repr(C)] #[derive(Clone, Debug, PartialEq, Eq, Serialize)] @@ -937,6 +982,28 @@ impl InstrumentDefMsg { } } +impl StatMsg { + /// Tries to convert the raw `stat_type` to an enum. + /// + /// # Errors + /// This function returns an error if the `stat_type` field does not + /// contain a valid [`StatType`](crate::enums::StatType). + pub fn stat_type(&self) -> crate::error::Result { + StatType::try_from(self.stat_type) + .map_err(|_| ConversionError::TypeConversion("Invalid stat_type")) + } + + /// Tries to convert the raw `update_action` to an enum. + /// + /// # Errors + /// This function returns an error if the `update_action` field does not + /// contain a valid [`StatUpdateAction`](crate::enums::StatUpdateAction). + pub fn update_action(&self) -> crate::error::Result { + StatUpdateAction::try_from(self.update_action) + .map_err(|_| ConversionError::TypeConversion("Invalid update_action")) + } +} + impl ErrorMsg { /// Creates a new `ErrorMsg`. /// @@ -1327,6 +1394,26 @@ impl AsRef<[u8]> for ImbalanceMsg { } } +impl HasRType for StatMsg { + fn has_rtype(rtype: u8) -> bool { + rtype == rtype::STATISTICS + } + + fn header(&self) -> &RecordHeader { + &self.hd + } + + fn header_mut(&mut self) -> &mut RecordHeader { + &mut self.hd + } +} + +impl AsRef<[u8]> for StatMsg { + fn as_ref(&self) -> &[u8] { + unsafe { as_u8_slice(self) } + } +} + impl HasRType for ErrorMsg { fn has_rtype(rtype: u8) -> bool { rtype == rtype::ERROR From 39e977e78a6540b4c6c789d41bdddf9e0c02eae1 Mon Sep 17 00:00:00 2001 From: Carter Green Date: Thu, 13 Apr 2023 10:35:29 -0500 Subject: [PATCH 09/22] REF: Refactor DBN python --- python/src/dbn_decoder.rs | 289 +++++++++++++++++++++++++++++++++ python/src/encode.rs | 332 ++++++++++++++++++++++++++++++++++++++ python/src/lib.rs | 296 ++------------------------------- rust/dbn/src/python.rs | 322 +----------------------------------- 4 files changed, 638 insertions(+), 601 deletions(-) create mode 100644 python/src/dbn_decoder.rs create mode 100644 python/src/encode.rs diff --git a/python/src/dbn_decoder.rs b/python/src/dbn_decoder.rs new file mode 100644 index 0000000..8acfe7e --- /dev/null +++ b/python/src/dbn_decoder.rs @@ -0,0 +1,289 @@ +use std::io::{self, Write}; + +use pyo3::{prelude::*, types::PyTuple}; + +use dbn::{ + decode::dbn::{MetadataDecoder, RecordDecoder}, + python::to_val_err, + record::HasRType, + rtype_ts_out_dispatch, +}; + +#[pyclass(module = "databento_dbn")] +pub struct DbnDecoder { + buffer: io::Cursor>, + has_decoded_metadata: bool, + ts_out: bool, +} + +#[pymethods] +impl DbnDecoder { + #[new] + fn new() -> Self { + Self { + buffer: io::Cursor::default(), + has_decoded_metadata: false, + ts_out: false, + } + } + + fn write(&mut self, bytes: &[u8]) -> PyResult<()> { + self.buffer.write_all(bytes).map_err(to_val_err) + } + + fn buffer(&self) -> &[u8] { + self.buffer.get_ref().as_slice() + } + + fn decode(&mut self) -> PyResult> { + let mut recs = Vec::new(); + let orig_position = self.buffer.position(); + self.buffer.set_position(0); + if !self.has_decoded_metadata { + match MetadataDecoder::new(&mut self.buffer).decode() { + Ok(metadata) => { + self.ts_out = metadata.ts_out; + Python::with_gil(|py| recs.push((metadata, py.None()).into_py(py))); + self.has_decoded_metadata = true; + } + Err(err) => { + self.buffer.set_position(orig_position); + // haven't read enough data for metadata + return Err(to_val_err(err)); + } + } + } + let mut read_position = self.buffer.position() as usize; + let mut decoder = RecordDecoder::new(&mut self.buffer); + Python::with_gil(|py| -> PyResult<()> { + while let Some(rec) = decoder.decode_ref().map_err(to_val_err)? { + // Bug in clippy generates an error here. trivial_copy feature isn't enabled, + // but clippy thinks these records are `Copy` + fn push_rec>>( + rec: &R, + py: Python, + recs: &mut Vec>, + ) { + let pyrec = rec.clone().into_py(py); + recs.push( + // Convert non `WithTsOut` records to a (rec, None) + // for consistent typing + if pyrec + .as_ref(py) + .is_instance_of::() + .unwrap_or_default() + { + pyrec + } else { + (pyrec, py.None()).into_py(py) + }, + ) + } + + // Safety: It's safe to cast to `WithTsOut` because we're passing in the `ts_out` + // from the metadata header. + if unsafe { rtype_ts_out_dispatch!(rec, self.ts_out, push_rec, py, &mut recs) } + .is_err() + { + return Err(to_val_err(format!( + "Invalid rtype {} found in record", + rec.header().rtype, + ))); + } + // keep track of position after last _successful_ decoding to ensure + // buffer is left in correct state in the case where one or more + // successful decodings is followed by a partial one, i.e. `decode_ref` + // returning `Ok(None)` + read_position = decoder.get_mut().position() as usize; + } + Ok(()) + }) + .map_err(|e| { + self.buffer.set_position(orig_position); + e + })?; + if recs.is_empty() { + self.buffer.set_position(orig_position); + } else { + self.shift_buffer(read_position); + } + Ok(recs) + } +} + +impl DbnDecoder { + fn shift_buffer(&mut self, read_position: usize) { + let inner_buf = self.buffer.get_mut(); + let length = inner_buf.len(); + let new_length = length - read_position; + inner_buf.drain(..read_position); + debug_assert_eq!(inner_buf.len(), new_length); + self.buffer.set_position(new_length as u64); + } +} + +#[cfg(test)] +mod tests { + use dbn::{ + encode::{dbn::Encoder, EncodeDbn}, + enums::{rtype, SType, Schema}, + record::{ErrorMsg, OhlcvMsg, RecordHeader}, + MetadataBuilder, + }; + use pyo3::{py_run, types::PyString}; + + use super::*; + use crate::tests::setup; + + #[test] + fn test_partial_records() { + setup(); + let mut decoder = DbnDecoder::new(); + let buffer = Vec::new(); + let mut encoder = Encoder::new( + buffer, + &MetadataBuilder::new() + .dataset("XNAS.ITCH".to_owned()) + .schema(Some(Schema::Trades)) + .stype_in(Some(SType::Native)) + .stype_out(SType::ProductId) + .start(0) + .build(), + ) + .unwrap(); + decoder.write(encoder.get_ref().as_slice()).unwrap(); + let metadata_pos = encoder.get_ref().len() as usize; + assert!(matches!(decoder.decode(), Ok(recs) if recs.len() == 1)); + assert!(decoder.has_decoded_metadata); + let rec = ErrorMsg::new(1680708278000000000, "Python"); + encoder.encode_record(&rec).unwrap(); + assert!(decoder.buffer.get_ref().is_empty()); + let record_pos = encoder.get_ref().len() as usize; + for i in metadata_pos..record_pos { + decoder.write(&encoder.get_ref()[i..i + 1]).unwrap(); + assert_eq!(decoder.buffer.get_ref().len(), i + 1 - metadata_pos); + // wrote last byte + if i == record_pos - 1 { + let res = decoder.decode(); + assert_eq!(record_pos - metadata_pos, std::mem::size_of_val(&rec)); + assert!(matches!(res, Ok(recs) if recs.len() == 1)); + } else { + let res = decoder.decode(); + assert!(matches!(res, Ok(recs) if recs.is_empty())); + } + } + } + + #[test] + fn test_full_with_partial_record() { + setup(); + let mut decoder = DbnDecoder::new(); + let buffer = Vec::new(); + let mut encoder = Encoder::new( + buffer, + &MetadataBuilder::new() + .dataset("XNAS.ITCH".to_owned()) + .schema(Some(Schema::Ohlcv1S)) + .stype_in(Some(SType::Native)) + .stype_out(SType::ProductId) + .start(0) + .build(), + ) + .unwrap(); + decoder.write(encoder.get_ref().as_slice()).unwrap(); + let metadata_pos = encoder.get_ref().len() as usize; + assert!(matches!(decoder.decode(), Ok(recs) if recs.len() == 1)); + assert!(decoder.has_decoded_metadata); + let rec1 = ErrorMsg::new(1680708278000000000, "Python"); + let rec2 = OhlcvMsg { + hd: RecordHeader::new::(rtype::OHLCV_1S, 1, 1, 1681228173000000000), + open: 100, + high: 200, + low: 50, + close: 150, + volume: 1000, + }; + encoder.encode_record(&rec1).unwrap(); + let rec1_pos = encoder.get_ref().len() as usize; + encoder.encode_record(&rec2).unwrap(); + assert!(decoder.buffer.get_ref().is_empty()); + // Write first record and part of second + decoder + .write(&encoder.get_ref()[metadata_pos..rec1_pos + 4]) + .unwrap(); + // Read first record + let res1 = decoder.decode(); + assert!(matches!(res1, Ok(recs) if recs.len() == 1)); + // Write rest of second record + decoder.write(&encoder.get_ref()[rec1_pos + 4..]).unwrap(); + let res2 = decoder.decode(); + assert!(matches!(res2, Ok(recs) if recs.len() == 1)); + } + + #[test] + fn test_dbn_decoder() { + setup(); + Python::with_gil(|py| { + let path = PyString::new( + py, + concat!( + env!("CARGO_MANIFEST_DIR"), + "/../tests/data/test_data.mbo.dbn" + ), + ); + py_run!( + py, + path, + r#"from databento_dbn import DbnDecoder + +decoder = DbnDecoder() +with open(path, 'rb') as fin: + decoder.write(fin.read()) +records = decoder.decode() +assert len(records) == 3 +metadata, _ = records[0] +for _, ts_out in records[1:]: + if metadata.ts_out: + assert ts_out is not None + else: + assert ts_out is None"# + ) + }); + } + + #[test] + fn test_dbn_decoder_decoding_error() { + setup(); + Python::with_gil(|py| { + py.run( + r#"from databento_dbn import DbnDecoder, Metadata + +metadata = Metadata( + dataset="GLBX.MDP3", + schema="mbo", + start=1, + stype_in="native", + stype_out="product_id", + end=2, + symbols=[], + partial=[], + not_found=[], + mappings=[] +) +metadata_bytes = bytes(metadata) +decoder = DbnDecoder() +decoder.write(metadata_bytes) +decoder.write(bytes([0x04, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00])) +try: + records = decoder.decode() + # If this code is called, the test will fail + assert False +except Exception as ex: + assert "Invalid rtype" in str(ex) +"#, + None, + None, + ) + }).unwrap(); + } +} diff --git a/python/src/encode.rs b/python/src/encode.rs new file mode 100644 index 0000000..684ea1a --- /dev/null +++ b/python/src/encode.rs @@ -0,0 +1,332 @@ +use std::{io, num::NonZeroU64}; + +use dbn::{ + encode::{ + dbn::{Encoder as DbnEncoder, MetadataEncoder}, + DbnEncodable, DynWriter, EncodeDbn, + }, + enums::{Compression, Schema}, + python::to_val_err, + record::{ + ImbalanceMsg, InstrumentDefMsg, MboMsg, Mbp10Msg, Mbp1Msg, OhlcvMsg, StatMsg, TbboMsg, + TradeMsg, + }, + Metadata, +}; +use pyo3::{ + exceptions::{PyTypeError, PyValueError}, + prelude::*, + types::PyBytes, + PyClass, +}; + +/// Updates existing fields that have already been written to the given file. +#[pyfunction] +pub fn update_encoded_metadata( + _py: Python<'_>, + file: PyFileLike, + start: u64, + end: Option, + limit: Option, +) -> PyResult<()> { + MetadataEncoder::new(file) + .update_encoded( + start, + end.and_then(NonZeroU64::new), + limit.and_then(NonZeroU64::new), + ) + .map_err(to_val_err) +} + +/// Encodes the given data in the DBN encoding and writes it to `file`. +/// +/// `records` is a list of record objects. +/// +/// # Errors +/// This function returns an error if any of the enum arguments cannot be converted to +/// their Rust equivalents. It will also return an error if there's an issue writing +/// the encoded to bytes or an expected field is missing from one of the dicts. +#[pyfunction] +pub fn write_dbn_file( + _py: Python<'_>, + file: PyFileLike, + compression: Compression, + metadata: &Metadata, + records: Vec<&PyAny>, +) -> PyResult<()> { + let writer = DynWriter::new(file, compression).map_err(to_val_err)?; + let encoder = DbnEncoder::new(writer, metadata).map_err(to_val_err)?; + match metadata.schema { + Some(Schema::Mbo) => encode_pyrecs::(encoder, &records), + Some(Schema::Mbp1) => encode_pyrecs::(encoder, &records), + Some(Schema::Mbp10) => encode_pyrecs::(encoder, &records), + Some(Schema::Tbbo) => encode_pyrecs::(encoder, &records), + Some(Schema::Trades) => encode_pyrecs::(encoder, &records), + Some(Schema::Ohlcv1S) + | Some(Schema::Ohlcv1M) + | Some(Schema::Ohlcv1H) + | Some(Schema::Ohlcv1D) => encode_pyrecs::(encoder, &records), + Some(Schema::Definition) => encode_pyrecs::(encoder, &records), + Some(Schema::Imbalance) => encode_pyrecs::(encoder, &records), + Some(Schema::Statistics) => encode_pyrecs::(encoder, &records), + Some(Schema::Status) | None => Err(PyValueError::new_err( + "Unsupported schema type for writing DBN files", + )), + } +} + +fn encode_pyrecs( + mut encoder: DbnEncoder>, + records: &[&PyAny], +) -> PyResult<()> { + encoder + .encode_records( + records + .iter() + .map(|obj| obj.extract()) + .collect::>>()? + .iter() + .as_slice(), + ) + .map_err(to_val_err) +} + +/// A Python object that implements the Python file interface. +pub struct PyFileLike { + inner: PyObject, +} + +impl<'source> FromPyObject<'source> for PyFileLike { + fn extract(any: &'source PyAny) -> PyResult { + Python::with_gil(|py| { + let obj: PyObject = any.extract()?; + if obj.getattr(py, "read").is_err() { + return Err(PyTypeError::new_err( + "object is missing a `read()` method".to_owned(), + )); + } + if obj.getattr(py, "write").is_err() { + return Err(PyTypeError::new_err( + "object is missing a `write()` method".to_owned(), + )); + } + if obj.getattr(py, "seek").is_err() { + return Err(PyTypeError::new_err( + "object is missing a `seek()` method".to_owned(), + )); + } + Ok(PyFileLike { inner: obj }) + }) + } +} + +impl io::Write for PyFileLike { + fn write(&mut self, buf: &[u8]) -> Result { + Python::with_gil(|py| { + let bytes = PyBytes::new(py, buf).to_object(py); + let number_bytes_written = self + .inner + .call_method(py, "write", (bytes,), None) + .map_err(py_to_rs_io_err)?; + + number_bytes_written.extract(py).map_err(py_to_rs_io_err) + }) + } + + fn flush(&mut self) -> Result<(), io::Error> { + Python::with_gil(|py| { + self.inner + .call_method(py, "flush", (), None) + .map_err(py_to_rs_io_err)?; + + Ok(()) + }) + } +} + +impl io::Seek for PyFileLike { + fn seek(&mut self, pos: io::SeekFrom) -> Result { + Python::with_gil(|py| { + let (whence, offset) = match pos { + io::SeekFrom::Start(i) => (0, i as i64), + io::SeekFrom::Current(i) => (1, i), + io::SeekFrom::End(i) => (2, i), + }; + + let new_position = self + .inner + .call_method(py, "seek", (offset, whence), None) + .map_err(py_to_rs_io_err)?; + + new_position.extract(py).map_err(py_to_rs_io_err) + }) + } +} + +fn py_to_rs_io_err(e: PyErr) -> io::Error { + Python::with_gil(|py| { + let e_as_object: PyObject = e.into_py(py); + + match e_as_object.call_method(py, "__str__", (), None) { + Ok(repr) => match repr.extract::(py) { + Ok(s) => io::Error::new(io::ErrorKind::Other, s), + Err(_e) => io::Error::new(io::ErrorKind::Other, "An unknown error has occurred"), + }, + Err(_) => io::Error::new(io::ErrorKind::Other, "Err doesn't have __str__"), + } + }) +} + +#[cfg(test)] +mod tests { + + use std::io::{Cursor, Seek, Write}; + use std::sync::{Arc, Mutex}; + + use dbn::{ + decode::{dbn::Decoder as DbnDecoder, DecodeDbn}, + enums::SType, + metadata::MetadataBuilder, + record::TbboMsg, + }; + + use super::*; + + const DBN_PATH: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/../tests/data"); + + #[pyclass] + struct MockPyFile { + buf: Arc>>>, + } + + #[pymethods] + impl MockPyFile { + fn read(&self) { + unimplemented!(); + } + + fn write(&mut self, bytes: &[u8]) -> usize { + self.buf.lock().unwrap().write_all(bytes).unwrap(); + bytes.len() + } + + fn flush(&mut self) { + self.buf.lock().unwrap().flush().unwrap(); + } + + fn seek(&self, offset: i64, whence: i32) -> u64 { + self.buf + .lock() + .unwrap() + .seek(match whence { + 0 => io::SeekFrom::Start(offset as u64), + 1 => io::SeekFrom::Current(offset), + 2 => io::SeekFrom::End(offset), + _ => unimplemented!("whence value"), + }) + .unwrap() + } + } + + impl MockPyFile { + fn new() -> Self { + Self { + buf: Arc::new(Mutex::new(Cursor::new(Vec::new()))), + } + } + + fn inner(&self) -> Arc>>> { + self.buf.clone() + } + } + + const DATASET: &str = "GLBX.MDP3"; + const STYPE: SType = SType::ProductId; + + macro_rules! test_writing_dbn_from_python { + ($test_name:ident, $record_type:ident, $schema:expr) => { + #[test] + fn $test_name() { + // Required one-time setup + pyo3::prepare_freethreaded_python(); + + // Read in test data + let decoder = DbnDecoder::from_zstd_file(format!( + "{DBN_PATH}/test_data.{}.dbn.zst", + $schema.as_str() + )) + .unwrap(); + let rs_recs = decoder.decode_records::<$record_type>().unwrap(); + let output_buf = Python::with_gil(|py| -> PyResult<_> { + // Convert JSON objects to Python `dict`s + let recs: Vec<_> = rs_recs + .iter() + .map(|rs_rec| rs_rec.clone().into_py(py)) + .collect(); + let mock_file = MockPyFile::new(); + let output_buf = mock_file.inner(); + let mock_file = Py::new(py, mock_file).unwrap().into_py(py); + let metadata = MetadataBuilder::new() + .dataset(DATASET.to_owned()) + .schema(Some($schema)) + .start(0) + .stype_in(Some(STYPE)) + .stype_out(STYPE) + .build(); + // Call target function + write_dbn_file( + py, + mock_file.extract(py).unwrap(), + Compression::ZStd, + &metadata, + recs.iter().map(|r| r.as_ref(py)).collect(), + ) + .unwrap(); + + Ok(output_buf.clone()) + }) + .unwrap(); + let output_buf = output_buf.lock().unwrap().clone().into_inner(); + + assert!(!output_buf.is_empty()); + + dbg!(&output_buf); + dbg!(output_buf.len()); + // Reread output written with `write_dbn_file` and compare to original + // contents + let py_decoder = DbnDecoder::with_zstd(Cursor::new(&output_buf)).unwrap(); + let metadata = py_decoder.metadata().clone(); + assert_eq!(metadata.schema, Some($schema)); + assert_eq!(metadata.dataset, DATASET); + assert_eq!(metadata.stype_in, Some(STYPE)); + assert_eq!(metadata.stype_out, STYPE); + let decoder = DbnDecoder::from_zstd_file(format!( + "{DBN_PATH}/test_data.{}.dbn.zst", + $schema.as_str() + )) + .unwrap(); + + let py_recs = py_decoder.decode_records::<$record_type>().unwrap(); + let exp_recs = decoder.decode_records::<$record_type>().unwrap(); + assert_eq!(py_recs.len(), exp_recs.len()); + for (py_rec, exp_rec) in py_recs.iter().zip(exp_recs.iter()) { + assert_eq!(py_rec, exp_rec); + } + assert_eq!( + py_recs.len(), + if $schema == Schema::Ohlcv1D { 0 } else { 2 } + ); + } + }; + } + + test_writing_dbn_from_python!(test_writing_mbo_from_python, MboMsg, Schema::Mbo); + test_writing_dbn_from_python!(test_writing_mbp1_from_python, Mbp1Msg, Schema::Mbp1); + test_writing_dbn_from_python!(test_writing_mbp10_from_python, Mbp10Msg, Schema::Mbp10); + test_writing_dbn_from_python!(test_writing_ohlcv1d_from_python, OhlcvMsg, Schema::Ohlcv1D); + test_writing_dbn_from_python!(test_writing_ohlcv1h_from_python, OhlcvMsg, Schema::Ohlcv1H); + test_writing_dbn_from_python!(test_writing_ohlcv1m_from_python, OhlcvMsg, Schema::Ohlcv1M); + test_writing_dbn_from_python!(test_writing_ohlcv1s_from_python, OhlcvMsg, Schema::Ohlcv1S); + test_writing_dbn_from_python!(test_writing_tbbo_from_python, TbboMsg, Schema::Tbbo); + test_writing_dbn_from_python!(test_writing_trades_from_python, TradeMsg, Schema::Trades); +} diff --git a/python/src/lib.rs b/python/src/lib.rs index 8eca6c3..f6075ba 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -1,18 +1,18 @@ //! Python bindings for the [`dbn`] crate using [`pyo3`]. -use std::io::{self, Write}; -use pyo3::{prelude::*, types::PyTuple, wrap_pyfunction, PyClass}; +use pyo3::{prelude::*, wrap_pyfunction, PyClass}; use dbn::{ - decode::dbn::{MetadataDecoder, RecordDecoder}, - python::to_val_err, record::{ - BidAskPair, ErrorMsg, HasRType, ImbalanceMsg, InstrumentDefMsg, MboMsg, Mbp10Msg, Mbp1Msg, - OhlcvMsg, RecordHeader, StatMsg, StatusMsg, SymbolMappingMsg, SystemMsg, TradeMsg, + BidAskPair, ErrorMsg, ImbalanceMsg, InstrumentDefMsg, MboMsg, Mbp10Msg, Mbp1Msg, OhlcvMsg, + RecordHeader, StatMsg, StatusMsg, SymbolMappingMsg, SystemMsg, TradeMsg, }, - rtype_ts_out_dispatch, Metadata, + Metadata, }; +mod dbn_decoder; +mod encode; + /// A Python module wrapping dbn functions #[pymodule] // The name of the function must match `lib.name` in `Cargo.toml` fn databento_dbn(_py: Python<'_>, m: &PyModule) -> PyResult<()> { @@ -22,9 +22,9 @@ fn databento_dbn(_py: Python<'_>, m: &PyModule) -> PyResult<()> { m.add_class::() } // all functions exposed to Python need to be added here - m.add_wrapped(wrap_pyfunction!(dbn::python::update_encoded_metadata))?; - m.add_wrapped(wrap_pyfunction!(dbn::python::write_dbn_file))?; - checked_add_class::(m)?; + m.add_wrapped(wrap_pyfunction!(encode::update_encoded_metadata))?; + m.add_wrapped(wrap_pyfunction!(encode::write_dbn_file))?; + checked_add_class::(m)?; checked_add_class::(m)?; checked_add_class::(m)?; checked_add_class::(m)?; @@ -43,133 +43,13 @@ fn databento_dbn(_py: Python<'_>, m: &PyModule) -> PyResult<()> { Ok(()) } -#[pyclass(module = "databento_dbn")] -struct DbnDecoder { - buffer: io::Cursor>, - has_decoded_metadata: bool, - ts_out: bool, -} - -#[pymethods] -impl DbnDecoder { - #[new] - fn new() -> Self { - Self { - buffer: io::Cursor::default(), - has_decoded_metadata: false, - ts_out: false, - } - } - - fn write(&mut self, bytes: &[u8]) -> PyResult<()> { - self.buffer.write_all(bytes).map_err(to_val_err) - } - - fn buffer(&self) -> &[u8] { - self.buffer.get_ref().as_slice() - } - - fn decode(&mut self) -> PyResult> { - let mut recs = Vec::new(); - let orig_position = self.buffer.position(); - self.buffer.set_position(0); - if !self.has_decoded_metadata { - match MetadataDecoder::new(&mut self.buffer).decode() { - Ok(metadata) => { - self.ts_out = metadata.ts_out; - Python::with_gil(|py| recs.push((metadata, py.None()).into_py(py))); - self.has_decoded_metadata = true; - } - Err(err) => { - self.buffer.set_position(orig_position); - // haven't read enough data for metadata - return Err(to_val_err(err)); - } - } - } - let mut read_position = self.buffer.position() as usize; - let mut decoder = RecordDecoder::new(&mut self.buffer); - Python::with_gil(|py| -> PyResult<()> { - while let Some(rec) = decoder.decode_ref().map_err(to_val_err)? { - // Bug in clippy generates an error here. trivial_copy feature isn't enabled, - // but clippy thinks these records are `Copy` - fn push_rec>>( - rec: &R, - py: Python, - recs: &mut Vec>, - ) { - let pyrec = rec.clone().into_py(py); - recs.push( - // Convert non `WithTsOut` records to a (rec, None) - // for consistent typing - if pyrec - .as_ref(py) - .is_instance_of::() - .unwrap_or_default() - { - pyrec - } else { - (pyrec, py.None()).into_py(py) - }, - ) - } - - // Safety: It's safe to cast to `WithTsOut` because we're passing in the `ts_out` - // from the metadata header. - if unsafe { rtype_ts_out_dispatch!(rec, self.ts_out, push_rec, py, &mut recs) } - .is_err() - { - return Err(to_val_err(format!( - "Invalid rtype {} found in record", - rec.header().rtype, - ))); - } - // keep track of position after last _successful_ decoding to ensure - // buffer is left in correct state in the case where one or more - // successful decodings is followed by a partial one, i.e. `decode_ref` - // returning `Ok(None)` - read_position = decoder.get_mut().position() as usize; - } - Ok(()) - }) - .map_err(|e| { - self.buffer.set_position(orig_position); - e - })?; - if recs.is_empty() { - self.buffer.set_position(orig_position); - } else { - self.shift_buffer(read_position); - } - Ok(recs) - } -} - -impl DbnDecoder { - fn shift_buffer(&mut self, read_position: usize) { - let inner_buf = self.buffer.get_mut(); - let length = inner_buf.len(); - let new_length = length - read_position; - inner_buf.drain(..read_position); - debug_assert_eq!(inner_buf.len(), new_length); - self.buffer.set_position(new_length as u64); - } -} - #[cfg(test)] mod tests { - use dbn::{encode::EncodeDbn, enums::rtype::OHLCV_1S}; - use pyo3::{py_run, types::PyString}; - - use ::dbn::{ - encode::dbn::Encoder, - enums::{SType, Schema}, - MetadataBuilder, - }; + use dbn::enums::SType; use super::*; - fn setup() { + pub fn setup() { if unsafe { pyo3::ffi::Py_IsInitialized() } == 0 { // add to available modules pyo3::append_to_inittab!(databento_dbn); @@ -178,122 +58,6 @@ mod tests { pyo3::prepare_freethreaded_python(); } - #[test] - fn test_partial_records() { - setup(); - let mut decoder = DbnDecoder::new(); - let buffer = Vec::new(); - let mut encoder = Encoder::new( - buffer, - &MetadataBuilder::new() - .dataset("XNAS.ITCH".to_owned()) - .schema(Some(Schema::Trades)) - .stype_in(Some(SType::Native)) - .stype_out(SType::ProductId) - .start(0) - .build(), - ) - .unwrap(); - decoder.write(encoder.get_ref().as_slice()).unwrap(); - let metadata_pos = encoder.get_ref().len() as usize; - assert!(matches!(decoder.decode(), Ok(recs) if recs.len() == 1)); - assert!(decoder.has_decoded_metadata); - let rec = ErrorMsg::new(1680708278000000000, "Python"); - encoder.encode_record(&rec).unwrap(); - assert!(decoder.buffer.get_ref().is_empty()); - let record_pos = encoder.get_ref().len() as usize; - for i in metadata_pos..record_pos { - decoder.write(&encoder.get_ref()[i..i + 1]).unwrap(); - assert_eq!(decoder.buffer.get_ref().len(), i + 1 - metadata_pos); - // wrote last byte - if i == record_pos - 1 { - let res = decoder.decode(); - assert_eq!(record_pos - metadata_pos, std::mem::size_of_val(&rec)); - assert!(matches!(res, Ok(recs) if recs.len() == 1)); - } else { - let res = decoder.decode(); - assert!(matches!(res, Ok(recs) if recs.is_empty())); - } - } - } - - #[test] - fn test_full_with_partial_record() { - setup(); - let mut decoder = DbnDecoder::new(); - let buffer = Vec::new(); - let mut encoder = Encoder::new( - buffer, - &MetadataBuilder::new() - .dataset("XNAS.ITCH".to_owned()) - .schema(Some(Schema::Ohlcv1S)) - .stype_in(Some(SType::Native)) - .stype_out(SType::ProductId) - .start(0) - .build(), - ) - .unwrap(); - decoder.write(encoder.get_ref().as_slice()).unwrap(); - let metadata_pos = encoder.get_ref().len() as usize; - assert!(matches!(decoder.decode(), Ok(recs) if recs.len() == 1)); - assert!(decoder.has_decoded_metadata); - let rec1 = ErrorMsg::new(1680708278000000000, "Python"); - let rec2 = OhlcvMsg { - hd: RecordHeader::new::(OHLCV_1S, 1, 1, 1681228173000000000), - open: 100, - high: 200, - low: 50, - close: 150, - volume: 1000, - }; - encoder.encode_record(&rec1).unwrap(); - let rec1_pos = encoder.get_ref().len() as usize; - encoder.encode_record(&rec2).unwrap(); - assert!(decoder.buffer.get_ref().is_empty()); - // Write first record and part of second - decoder - .write(&encoder.get_ref()[metadata_pos..rec1_pos + 4]) - .unwrap(); - // Read first record - let res1 = decoder.decode(); - assert!(matches!(res1, Ok(recs) if recs.len() == 1)); - // Write rest of second record - decoder.write(&encoder.get_ref()[rec1_pos + 4..]).unwrap(); - let res2 = decoder.decode(); - assert!(matches!(res2, Ok(recs) if recs.len() == 1)); - } - - #[test] - fn test_dbn_decoder() { - setup(); - Python::with_gil(|py| { - let path = PyString::new( - py, - concat!( - env!("CARGO_MANIFEST_DIR"), - "/../tests/data/test_data.mbo.dbn" - ), - ); - py_run!( - py, - path, - r#"from databento_dbn import DbnDecoder - -decoder = DbnDecoder() -with open(path, 'rb') as fin: - decoder.write(fin.read()) -records = decoder.decode() -assert len(records) == 3 -metadata, _ = records[0] -for _, ts_out in records[1:]: - if metadata.ts_out: - assert ts_out is not None - else: - assert ts_out is None"# - ) - }); - } - #[test] fn test_metadata_identity() { // initialize interpreter @@ -353,40 +117,4 @@ except Exception: }) .unwrap(); } - - #[test] - fn test_dbn_decoder_decoding_error() { - setup(); - Python::with_gil(|py| { - py.run( - r#"from databento_dbn import DbnDecoder, Metadata - -metadata = Metadata( - dataset="GLBX.MDP3", - schema="mbo", - start=1, - stype_in="native", - stype_out="product_id", - end=2, - symbols=[], - partial=[], - not_found=[], - mappings=[] -) -metadata_bytes = bytes(metadata) -decoder = DbnDecoder() -decoder.write(metadata_bytes) -decoder.write(bytes([0x04, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00])) -try: - records = decoder.decode() - # If this code is called, the test will fail - assert False -except Exception as ex: - assert "Invalid rtype" in str(ex) -"#, - None, - None, - ) - }).unwrap(); - } } diff --git a/rust/dbn/src/python.rs b/rust/dbn/src/python.rs index 34be6d4..cc54036 100644 --- a/rust/dbn/src/python.rs +++ b/rust/dbn/src/python.rs @@ -1,23 +1,19 @@ //! Python wrappers around dbn functions. These are implemented here instead of in `python/` -//! to be able to implement [`pyo3`] traits for [`dbn`] types. +//! to be able to implement [`pyo3`] traits for DBN types. #![allow(clippy::too_many_arguments)] -use std::{collections::HashMap, ffi::c_char, fmt, io, io::SeekFrom, num::NonZeroU64}; +use std::{collections::HashMap, ffi::c_char, fmt, io, num::NonZeroU64}; use pyo3::{ - exceptions::{PyTypeError, PyValueError}, + exceptions::PyValueError, prelude::*, types::{PyBytes, PyDate, PyDateAccess, PyDict, PyTuple}, - PyClass, }; use time::Date; use crate::{ decode::{DecodeDbn, DynDecoder}, - encode::{ - dbn::{self, MetadataEncoder}, - DbnEncodable, DynWriter, EncodeDbn, - }, + encode::dbn::MetadataEncoder, enums::{ rtype, Compression, SType, Schema, SecurityUpdateAction, StatUpdateAction, UserDefinedInstrument, @@ -26,112 +22,12 @@ use crate::{ record::{ str_to_c_chars, BidAskPair, ErrorMsg, HasRType, ImbalanceMsg, InstrumentDefMsg, MboMsg, Mbp10Msg, Mbp1Msg, OhlcvMsg, RecordHeader, StatMsg, StatusMsg, SymbolMappingMsg, SystemMsg, - TbboMsg, TradeMsg, WithTsOut, + TradeMsg, WithTsOut, }, UNDEF_ORDER_SIZE, UNDEF_PRICE, }; use crate::{MappingInterval, Metadata, SymbolMapping}; -/// Updates existing fields that have already been written to the given file. -#[pyfunction] -pub fn update_encoded_metadata( - _py: Python<'_>, - file: PyFileLike, - start: u64, - end: Option, - limit: Option, -) -> PyResult<()> { - MetadataEncoder::new(file) - .update_encoded( - start, - end.and_then(NonZeroU64::new), - limit.and_then(NonZeroU64::new), - ) - .map_err(to_val_err) -} - -/// A Python object that implements the Python file interface. -pub struct PyFileLike { - inner: PyObject, -} - -/// Encodes the given data in the DBN encoding and writes it to `file`. -/// -/// `records` is a list of record objects. -/// -/// # Errors -/// This function returns an error if any of the enum arguments cannot be converted to -/// their Rust equivalents. It will also return an error if there's an issue writing -/// the encoded to bytes or an expected field is missing from one of the dicts. -#[pyfunction] -pub fn write_dbn_file( - _py: Python<'_>, - file: PyFileLike, - compression: Compression, - metadata: &Metadata, - records: Vec<&PyAny>, -) -> PyResult<()> { - let writer = DynWriter::new(file, compression).map_err(to_val_err)?; - let encoder = dbn::Encoder::new(writer, metadata).map_err(to_val_err)?; - match metadata.schema { - Some(Schema::Mbo) => encode_pyrecs::(encoder, &records), - Some(Schema::Mbp1) => encode_pyrecs::(encoder, &records), - Some(Schema::Mbp10) => encode_pyrecs::(encoder, &records), - Some(Schema::Tbbo) => encode_pyrecs::(encoder, &records), - Some(Schema::Trades) => encode_pyrecs::(encoder, &records), - Some(Schema::Ohlcv1S) - | Some(Schema::Ohlcv1M) - | Some(Schema::Ohlcv1H) - | Some(Schema::Ohlcv1D) => encode_pyrecs::(encoder, &records), - Some(Schema::Definition) => encode_pyrecs::(encoder, &records), - Some(Schema::Imbalance) => encode_pyrecs::(encoder, &records), - Some(Schema::Statistics) => encode_pyrecs::(encoder, &records), - Some(Schema::Status) | None => Err(PyValueError::new_err( - "Unsupported schema type for writing DBN files", - )), - } -} - -fn encode_pyrecs( - mut encoder: dbn::Encoder>, - records: &[&PyAny], -) -> PyResult<()> { - encoder - .encode_records( - records - .iter() - .map(|obj| obj.extract()) - .collect::>>()? - .iter() - .as_slice(), - ) - .map_err(to_val_err) -} - -impl<'source> FromPyObject<'source> for PyFileLike { - fn extract(any: &'source PyAny) -> PyResult { - Python::with_gil(|py| { - let obj: PyObject = any.extract()?; - if obj.getattr(py, "read").is_err() { - return Err(PyTypeError::new_err( - "object is missing a `read()` method".to_owned(), - )); - } - if obj.getattr(py, "write").is_err() { - return Err(PyTypeError::new_err( - "object is missing a `write()` method".to_owned(), - )); - } - if obj.getattr(py, "seek").is_err() { - return Err(PyTypeError::new_err( - "object is missing a `seek()` method".to_owned(), - )); - } - Ok(PyFileLike { inner: obj }) - }) - } -} - #[pymethods] impl Metadata { #[new] @@ -299,63 +195,6 @@ pub fn to_val_err(e: impl fmt::Debug) -> PyErr { PyValueError::new_err(format!("{e:?}")) } -fn py_to_rs_io_err(e: PyErr) -> io::Error { - Python::with_gil(|py| { - let e_as_object: PyObject = e.into_py(py); - - match e_as_object.call_method(py, "__str__", (), None) { - Ok(repr) => match repr.extract::(py) { - Ok(s) => io::Error::new(io::ErrorKind::Other, s), - Err(_e) => io::Error::new(io::ErrorKind::Other, "An unknown error has occurred"), - }, - Err(_) => io::Error::new(io::ErrorKind::Other, "Err doesn't have __str__"), - } - }) -} - -impl io::Write for PyFileLike { - fn write(&mut self, buf: &[u8]) -> Result { - Python::with_gil(|py| { - let bytes = PyBytes::new(py, buf).to_object(py); - let number_bytes_written = self - .inner - .call_method(py, "write", (bytes,), None) - .map_err(py_to_rs_io_err)?; - - number_bytes_written.extract(py).map_err(py_to_rs_io_err) - }) - } - - fn flush(&mut self) -> Result<(), io::Error> { - Python::with_gil(|py| { - self.inner - .call_method(py, "flush", (), None) - .map_err(py_to_rs_io_err)?; - - Ok(()) - }) - } -} - -impl io::Seek for PyFileLike { - fn seek(&mut self, pos: SeekFrom) -> Result { - Python::with_gil(|py| { - let (whence, offset) = match pos { - SeekFrom::Start(i) => (0, i as i64), - SeekFrom::Current(i) => (1, i), - SeekFrom::End(i) => (2, i), - }; - - let new_position = self - .inner - .call_method(py, "seek", (offset, whence), None) - .map_err(py_to_rs_io_err)?; - - new_position.extract(py).map_err(py_to_rs_io_err) - }) - } -} - impl<'source> FromPyObject<'source> for Compression { fn extract(any: &'source PyAny) -> PyResult { let str: &str = any.extract()?; @@ -1156,154 +995,3 @@ impl SystemMsg { self.msg().map_err(to_val_err) } } - -#[cfg(test)] -mod tests { - use std::io::{Cursor, Seek, Write}; - use std::sync::{Arc, Mutex}; - - use streaming_iterator::StreamingIterator; - - use super::*; - use crate::decode::{dbn, DecodeDbn}; - use crate::metadata::MetadataBuilder; - - const DBN_PATH: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/../../tests/data"); - - #[pyclass] - struct MockPyFile { - buf: Arc>>>, - } - - #[pymethods] - impl MockPyFile { - fn read(&self) { - unimplemented!(); - } - - fn write(&mut self, bytes: &[u8]) -> usize { - self.buf.lock().unwrap().write_all(bytes).unwrap(); - bytes.len() - } - - fn flush(&mut self) { - self.buf.lock().unwrap().flush().unwrap(); - } - - fn seek(&self, offset: i64, whence: i32) -> u64 { - self.buf - .lock() - .unwrap() - .seek(match whence { - 0 => SeekFrom::Start(offset as u64), - 1 => SeekFrom::Current(offset), - 2 => SeekFrom::End(offset), - _ => unimplemented!("whence value"), - }) - .unwrap() - } - } - - impl MockPyFile { - fn new() -> Self { - Self { - buf: Arc::new(Mutex::new(Cursor::new(Vec::new()))), - } - } - - fn inner(&self) -> Arc>>> { - self.buf.clone() - } - } - - const DATASET: &str = "GLBX.MDP3"; - const STYPE: SType = SType::ProductId; - - macro_rules! test_writing_dbn_from_python { - ($test_name:ident, $record_type:ident, $schema:expr) => { - #[test] - fn $test_name() { - // Required one-time setup - pyo3::prepare_freethreaded_python(); - - // Read in test data - let decoder = dbn::Decoder::from_zstd_file(format!( - "{DBN_PATH}/test_data.{}.dbn.zst", - $schema.as_str() - )) - .unwrap(); - let rs_recs = decoder.decode_records::<$record_type>().unwrap(); - let output_buf = Python::with_gil(|py| -> PyResult<_> { - // Convert JSON objects to Python `dict`s - let recs: Vec<_> = rs_recs - .iter() - .map(|rs_rec| rs_rec.clone().into_py(py)) - .collect(); - let mock_file = MockPyFile::new(); - let output_buf = mock_file.inner(); - let mock_file = Py::new(py, mock_file).unwrap().into_py(py); - let metadata = MetadataBuilder::new() - .dataset(DATASET.to_owned()) - .schema(Some($schema)) - .start(0) - .stype_in(Some(STYPE)) - .stype_out(STYPE) - .build(); - // Call target function - write_dbn_file( - py, - mock_file.extract(py).unwrap(), - Compression::ZStd, - &metadata, - recs.iter().map(|r| r.as_ref(py)).collect(), - ) - .unwrap(); - - Ok(output_buf.clone()) - }) - .unwrap(); - let output_buf = output_buf.lock().unwrap().clone().into_inner(); - - assert!(!output_buf.is_empty()); - - dbg!(&output_buf); - dbg!(output_buf.len()); - // Reread output written with `write_dbn_file` and compare to original - // contents - let py_decoder = dbn::Decoder::with_zstd(Cursor::new(&output_buf)).unwrap(); - let metadata = py_decoder.metadata().clone(); - assert_eq!(metadata.schema, Some($schema)); - assert_eq!(metadata.dataset, DATASET); - assert_eq!(metadata.stype_in, Some(STYPE)); - assert_eq!(metadata.stype_out, STYPE); - let decoder = dbn::Decoder::from_zstd_file(format!( - "{DBN_PATH}/test_data.{}.dbn.zst", - $schema.as_str() - )) - .unwrap(); - - let mut py_iter = py_decoder.decode_stream::<$record_type>().unwrap(); - let mut expected_iter = decoder.decode_stream::<$record_type>().unwrap(); - let mut count = 0; - while let Some((py_rec, exp_rec)) = py_iter - .next() - .and_then(|py_rec| expected_iter.next().map(|exp_rec| (py_rec, exp_rec))) - { - assert_eq!(py_rec, exp_rec); - count += 1; - } - assert_eq!(count, if $schema == Schema::Ohlcv1D { 0 } else { 2 }); - } - }; - } - - test_writing_dbn_from_python!(test_writing_mbo_from_python, MboMsg, Schema::Mbo); - test_writing_dbn_from_python!(test_writing_mbp1_from_python, Mbp1Msg, Schema::Mbp1); - test_writing_dbn_from_python!(test_writing_mbp10_from_python, Mbp10Msg, Schema::Mbp10); - test_writing_dbn_from_python!(test_writing_ohlcv1d_from_python, OhlcvMsg, Schema::Ohlcv1D); - test_writing_dbn_from_python!(test_writing_ohlcv1h_from_python, OhlcvMsg, Schema::Ohlcv1H); - test_writing_dbn_from_python!(test_writing_ohlcv1m_from_python, OhlcvMsg, Schema::Ohlcv1M); - test_writing_dbn_from_python!(test_writing_ohlcv1s_from_python, OhlcvMsg, Schema::Ohlcv1S); - test_writing_dbn_from_python!(test_writing_tbbo_from_python, TbboMsg, Schema::Tbbo); - test_writing_dbn_from_python!(test_writing_trades_from_python, TradeMsg, Schema::Trades); -} From 41866742e225423e5ff5f9624dd3ba4d1e976e14 Mon Sep 17 00:00:00 2001 From: Nick Macholl Date: Thu, 13 Apr 2023 13:23:52 -0700 Subject: [PATCH 10/22] ADD: Add richcmp for Python DBN classes --- CHANGELOG.md | 1 + python/databento_dbn.pyi | 132 ++++++++++++++------------------------- rust/dbn/src/python.rs | 113 +++++++++++++++++++++++++++++++-- 3 files changed, 158 insertions(+), 88 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5229e8c..6788016 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - Fixed value associated with `Side::None` - Fixed issue with decoding partial records in Python `DbnDecoder` - Fixed missing type hint for Metadata bytes support +- Added support for equality comparisons in Python classes ## 0.4.3 - 2023-04-07 - Fixed typo in Python type stubs diff --git a/python/databento_dbn.pyi b/python/databento_dbn.pyi index 0f85722..7bde989 100644 --- a/python/databento_dbn.pyi +++ b/python/databento_dbn.pyi @@ -38,6 +38,7 @@ class Metadata(SupportsBytes): """ def __bytes__(self) -> bytes: ... + def __richcmp__(self) -> bool: ... @property def version(self) -> int: """ @@ -133,34 +134,34 @@ class Metadata(SupportsBytes): """ @property - def symbols(self) -> Sequence[str]: + def symbols(self) -> List[str]: """ The original query input symbols from the request. Returns ------- - Sequence[str] + List[str] """ @property - def partial(self) -> Sequence[str]: + def partial(self) -> List[str]: """ Symbols that did not resolve for at least one day in the query time range. Returns ------- - str + List[str] """ @property - def not_found(self) -> Sequence[str]: + def not_found(self) -> List[str]: """ Symbols that did not resolve for any day in the query time range. Returns ------- - Sequence[str] + List[str] """ @property @@ -173,6 +174,41 @@ class Metadata(SupportsBytes): Dict[str, List[Dict[str, Any]]]: """ + @classmethod + def decode(cls, data: bytes) -> "Metadata": + """ + Decodes the given Python `bytes` to `Metadata`. Returns a `Metadata` + object with all the DBN metadata attributes. + + Parameters + ---------- + data : bytes + The bytes to decode from. + + Returns + ------- + Metadata + + Raises + ------ + ValueError + When a Metadata instance cannot be parsed from `data`. + + """ + def encode(self) -> bytes: + """ + Encodes the Metadata to bytes. + + Returns + ------- + bytes + + Raises + ------ + ValueError + When the Metadata object cannot be encoded. + + """ class RecordHeader: """DBN Record Header.""" @@ -233,6 +269,7 @@ class Record(SupportsBytes): """Base class for DBN records.""" def __bytes__(self) -> bytes: ... + def __richcmp__(self) -> bool: ... @property def hd(self) -> RecordHeader: """ @@ -536,13 +573,13 @@ class MBP1Msg(Record, _MBPBase): """Market by price implementation with a known book depth of 1.""" @property - def booklevel(self) -> Sequence[BidAskPair]: + def booklevel(self) -> List[BidAskPair]: """ The top of the order book. Returns ------- - Sequence[BidAskPair] + List[BidAskPair] Notes ----- @@ -554,13 +591,13 @@ class MBP10Msg(Record, _MBPBase): """Market by price implementation with a known book depth of 10.""" @property - def booklevel(self) -> Sequence[BidAskPair]: + def booklevel(self) -> List[BidAskPair]: """ The top of the order book. Returns ------- - Sequence[BidAskPair] + List[BidAskPair] Notes ----- @@ -1550,10 +1587,9 @@ class SystemMsg(Record): class DbnDecoder: """A class for decoding DBN data to Python objects.""" - @property def buffer(self) -> bytes: """ - The internal buffer. + Return the internal buffer of the decoder. Returns ------- @@ -1598,78 +1634,6 @@ class DbnDecoder: """ -def decode_metadata(bytes: bytes) -> Metadata: - """ - Decodes the given Python `bytes` to `Metadata`. Returns a `Metadata` object - with all the DBN metadata attributes. - - Parameters - ---------- - bytes - - Raises - ------ - ValueError - When the metadata cannot be parsed from `bytes`. - - """ - -def encode_metadata( - dataset: str, - schema: str, - start: int, - stype_in: str, - stype_out: str, - symbols: Sequence[str], - partial: Sequence[str], - not_found: Sequence[str], - mappings: Sequence[object], - end: Optional[int] = None, - limit: Optional[int] = None, -) -> bytes: - """ - Encodes the given metadata into the DBN metadata binary format. Returns - Python `bytes`. - - Parameters - ---------- - dataset : str - The dataset code. - schema : str - The data record schema. - start : int - The UNIX nanosecond timestamp of the query start, or the first record - if the file was split. - stype_in : str - The input symbology type to map from. - stype_out: str - The output symbology type to map to. - symbols : Sequence[str] - The original query input symbols from the request. - partial : Sequence[str] - Symbols that did not resolve for _at least one day_ in the query time range. - not_found : Sequence[str] - Symbols that did not resolve for _any_ day in the query time range. - mappings : Sequence[Dict[str, Any]] - Symbol mappings containing a native symbol and its mapping intervals. - end : Optional[int] - The UNIX nanosecond timestamp of the query end, or the last record - if the file was split. - limit : Optional[int] - The optional maximum number of records for the query. - - Returns - ------- - bytes - - Raises - ------ - ValueError - When any of the arguments cannot be converted to their Rust equivalents. - When there's an issue writing the encoded metadata to bytes. - - """ - def update_encoded_metadata( file: BinaryIO, start: int, diff --git a/rust/dbn/src/python.rs b/rust/dbn/src/python.rs index cc54036..28049c9 100644 --- a/rust/dbn/src/python.rs +++ b/rust/dbn/src/python.rs @@ -7,7 +7,8 @@ use std::{collections::HashMap, ffi::c_char, fmt, io, num::NonZeroU64}; use pyo3::{ exceptions::PyValueError, prelude::*, - types::{PyBytes, PyDate, PyDateAccess, PyDict, PyTuple}, + pyclass::CompareOp, + types::{PyBytes, PyDate, PyDateAccess, PyDict, PyTuple, PyType}, }; use time::Date; @@ -61,6 +62,14 @@ impl Metadata { .build() } + fn __richcmp__(&self, other: &Metadata, op: CompareOp, py: Python<'_>) -> Py { + match op { + CompareOp::Eq => self.eq(other).into_py(py), + CompareOp::Ne => self.ne(other).into_py(py), + _ => py.NotImplemented(), + } + } + fn __repr__(&self) -> String { format!("{self:?}") } @@ -80,9 +89,9 @@ impl Metadata { } #[pyo3(name = "decode")] - #[staticmethod] - fn py_decode(bytes: &PyBytes) -> PyResult { - let reader = io::BufReader::new(bytes.as_bytes()); + #[classmethod] + fn py_decode(_cls: &PyType, data: &PyBytes) -> PyResult { + let reader = io::BufReader::new(data.as_bytes()); Ok(DynDecoder::inferred_with_buffer(reader) .map_err(to_val_err)? .metadata() @@ -285,6 +294,14 @@ impl MboMsg { } } + fn __richcmp__(&self, other: &MboMsg, op: CompareOp, py: Python<'_>) -> Py { + match op { + CompareOp::Eq => self.eq(other).into_py(py), + CompareOp::Ne => self.ne(other).into_py(py), + _ => py.NotImplemented(), + } + } + fn __repr__(&self) -> String { format!("{self:?}") } @@ -356,6 +373,14 @@ impl TradeMsg { } } + fn __richcmp__(&self, other: &TradeMsg, op: CompareOp, py: Python<'_>) -> Py { + match op { + CompareOp::Eq => self.eq(other).into_py(py), + CompareOp::Ne => self.ne(other).into_py(py), + _ => py.NotImplemented(), + } + } + fn __repr__(&self) -> String { format!("{self:?}") } @@ -403,6 +428,14 @@ impl Mbp1Msg { } } + fn __richcmp__(&self, other: &Mbp1Msg, op: CompareOp, py: Python<'_>) -> Py { + match op { + CompareOp::Eq => self.eq(other).into_py(py), + CompareOp::Ne => self.ne(other).into_py(py), + _ => py.NotImplemented(), + } + } + fn __repr__(&self) -> String { format!("{self:?}") } @@ -462,6 +495,14 @@ impl Mbp10Msg { }) } + fn __richcmp__(&self, other: &Mbp10Msg, op: CompareOp, py: Python<'_>) -> Py { + match op { + CompareOp::Eq => self.eq(other).into_py(py), + CompareOp::Ne => self.ne(other).into_py(py), + _ => py.NotImplemented(), + } + } + fn __repr__(&self) -> String { format!("{self:?}") } @@ -500,6 +541,14 @@ impl OhlcvMsg { } } + fn __richcmp__(&self, other: &OhlcvMsg, op: CompareOp, py: Python<'_>) -> Py { + match op { + CompareOp::Eq => self.eq(other).into_py(py), + CompareOp::Ne => self.ne(other).into_py(py), + _ => py.NotImplemented(), + } + } + fn __repr__(&self) -> String { format!("{self:?}") } @@ -537,6 +586,14 @@ impl StatusMsg { }) } + fn __richcmp__(&self, other: &StatusMsg, op: CompareOp, py: Python<'_>) -> Py { + match op { + CompareOp::Eq => self.eq(other).into_py(py), + CompareOp::Ne => self.ne(other).into_py(py), + _ => py.NotImplemented(), + } + } + fn __repr__(&self) -> String { format!("{self:?}") } @@ -704,6 +761,14 @@ impl InstrumentDefMsg { }) } + fn __richcmp__(&self, other: &InstrumentDefMsg, op: CompareOp, py: Python<'_>) -> Py { + match op { + CompareOp::Eq => self.eq(other).into_py(py), + CompareOp::Ne => self.ne(other).into_py(py), + _ => py.NotImplemented(), + } + } + fn __repr__(&self) -> String { format!("{self:?}") } @@ -827,6 +892,14 @@ impl ImbalanceMsg { } } + fn __richcmp__(&self, other: &ImbalanceMsg, op: CompareOp, py: Python<'_>) -> Py { + match op { + CompareOp::Eq => self.eq(other).into_py(py), + CompareOp::Ne => self.ne(other).into_py(py), + _ => py.NotImplemented(), + } + } + fn __repr__(&self) -> String { format!("{self:?}") } @@ -875,6 +948,14 @@ impl StatMsg { } } + fn __richcmp__(&self, other: &StatMsg, op: CompareOp, py: Python<'_>) -> Py { + match op { + CompareOp::Eq => self.eq(other).into_py(py), + CompareOp::Ne => self.ne(other).into_py(py), + _ => py.NotImplemented(), + } + } + fn __repr__(&self) -> String { format!("{self:?}") } @@ -896,6 +977,14 @@ impl ErrorMsg { Ok(ErrorMsg::new(ts_event, err)) } + fn __richcmp__(&self, other: &ErrorMsg, op: CompareOp, py: Python<'_>) -> Py { + match op { + CompareOp::Eq => self.eq(other).into_py(py), + CompareOp::Ne => self.ne(other).into_py(py), + _ => py.NotImplemented(), + } + } + fn __repr__(&self) -> String { format!("{self:?}") } @@ -943,6 +1032,14 @@ impl SymbolMappingMsg { }) } + fn __richcmp__(&self, other: &SymbolMappingMsg, op: CompareOp, py: Python<'_>) -> Py { + match op { + CompareOp::Eq => self.eq(other).into_py(py), + CompareOp::Ne => self.ne(other).into_py(py), + _ => py.NotImplemented(), + } + } + fn __repr__(&self) -> String { format!("{self:?}") } @@ -976,6 +1073,14 @@ impl SystemMsg { SystemMsg::new(ts_event, msg).map_err(to_val_err) } + fn __richcmp__(&self, other: &SystemMsg, op: CompareOp, py: Python<'_>) -> Py { + match op { + CompareOp::Eq => self.eq(other).into_py(py), + CompareOp::Ne => self.ne(other).into_py(py), + _ => py.NotImplemented(), + } + } + fn __repr__(&self) -> String { format!("{self:?}") } From 1115bd6a9ad3ae0bbbed1b3180c1f2ef7d5370e9 Mon Sep 17 00:00:00 2001 From: Carter Green Date: Mon, 17 Apr 2023 11:17:37 -0500 Subject: [PATCH 11/22] ADD: Add retries to DBN test --- scripts/test.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/test.sh b/scripts/test.sh index 962eede..888a323 100755 --- a/scripts/test.sh +++ b/scripts/test.sh @@ -1,2 +1,6 @@ #! /usr/bin/env bash -cargo test --features async,python +for i in $(seq 1 3); do + if cargo test --features async,python; then + break + fi +done From 60b8ad725603c06b2ee8d0034c650b8672524d72 Mon Sep 17 00:00:00 2001 From: Nick Macholl Date: Tue, 18 Apr 2023 07:42:39 -0700 Subject: [PATCH 12/22] MOD: Replace richcmp with eq and ne in stype stub --- python/databento_dbn.pyi | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/databento_dbn.pyi b/python/databento_dbn.pyi index 7bde989..7af69ef 100644 --- a/python/databento_dbn.pyi +++ b/python/databento_dbn.pyi @@ -38,7 +38,8 @@ class Metadata(SupportsBytes): """ def __bytes__(self) -> bytes: ... - def __richcmp__(self) -> bool: ... + def __eq__(self, other) -> bool: ... + def __ne__(self, other) -> bool: ... @property def version(self) -> int: """ @@ -269,7 +270,8 @@ class Record(SupportsBytes): """Base class for DBN records.""" def __bytes__(self) -> bytes: ... - def __richcmp__(self) -> bool: ... + def __eq__(self, other) -> bool: ... + def __ne__(self, other) -> bool: ... @property def hd(self) -> RecordHeader: """ From 14c362a058639aa15a12931078b817a0f7859039 Mon Sep 17 00:00:00 2001 From: Carter Green Date: Mon, 17 Apr 2023 13:53:42 -0500 Subject: [PATCH 13/22] ADD: Add is_heartbeat to Python DBN --- python/databento_dbn.pyi | 11 +++++++++++ rust/dbn/src/python.rs | 6 ++++++ 2 files changed, 17 insertions(+) diff --git a/python/databento_dbn.pyi b/python/databento_dbn.pyi index 7af69ef..99245ab 100644 --- a/python/databento_dbn.pyi +++ b/python/databento_dbn.pyi @@ -1584,6 +1584,17 @@ class SystemMsg(Record): ------- str + """ + @property + def is_heartbeat(self) -> bool: + """ + `true` if this message is a heartbeat, used to indicate the connection with the gateway + is still open. + + Returns + ------- + bool + """ class DbnDecoder: diff --git a/rust/dbn/src/python.rs b/rust/dbn/src/python.rs index 28049c9..48e8e28 100644 --- a/rust/dbn/src/python.rs +++ b/rust/dbn/src/python.rs @@ -1099,4 +1099,10 @@ impl SystemMsg { fn py_msg(&self) -> PyResult<&str> { self.msg().map_err(to_val_err) } + + #[getter] + #[pyo3(name = "is_heartbeat")] + fn py_is_heartbeat(&self) -> bool { + self.is_heartbeat() + } } From d0c121d74d4e4d266c0c683147a327179d3ed510 Mon Sep 17 00:00:00 2001 From: Nick Macholl Date: Tue, 18 Apr 2023 12:19:50 -0700 Subject: [PATCH 14/22] FIX: Return type stubs for Metadata fields --- python/databento_dbn.pyi | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/databento_dbn.pyi b/python/databento_dbn.pyi index 99245ab..2b90da1 100644 --- a/python/databento_dbn.pyi +++ b/python/databento_dbn.pyi @@ -61,14 +61,14 @@ class Metadata(SupportsBytes): """ @property - def schema(self) -> int: + def schema(self) -> Optional[str]: """ The data record schema. Specifies which record type is stored in the Zstd-compressed DBN file. Returns ------- - int + Optional[str] """ @property @@ -104,23 +104,23 @@ class Metadata(SupportsBytes): """ @property - def stype_in(self) -> int: + def stype_in(self) -> Optional[str]: """ The input symbology type to map from. Returns ------- - int + Optional[str] """ @property - def stype_out(self) -> int: + def stype_out(self) -> str: """ The output symbology type to map to. Returns ------- - int + str """ @property From 63d825f96ee85f2852f96fe66c8ae61a1c55118e Mon Sep 17 00:00:00 2001 From: Carter Green Date: Wed, 19 Apr 2023 08:48:55 -0500 Subject: [PATCH 15/22] MOD: Upgrade Rust dependencies --- Cargo.lock | 322 +++++++++++------------- c/Cargo.toml | 2 +- python/Cargo.toml | 4 +- rust/dbn-cli/Cargo.toml | 10 +- rust/dbn-cli/tests/integration_tests.rs | 2 +- rust/dbn/Cargo.toml | 10 +- 6 files changed, 163 insertions(+), 187 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 33821eb..8031388 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,11 +11,54 @@ dependencies = [ "memchr", ] +[[package]] +name = "anstream" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e579a7752471abc2a8268df8b20005e3eadd975f585398f17efcfd8d4927371" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is-terminal", + "utf8parse", +] + [[package]] name = "anstyle" -version = "0.3.5" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41ed9a86bf92ae6580e0a31281f65a1b1d867c0cc68d5346e2ae128dddfa6a7d" + +[[package]] +name = "anstyle-parse" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e765fd216e48e067936442276d1d57399e37bce53c264d6fefbe298080cb57ee" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23ea9e81bd02e310c216d080f6223c179012256e5151c41db88d12c88a1684d2" +checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" +dependencies = [ + "windows-sys 0.48.0", +] + +[[package]] +name = "anstyle-wincon" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bcd8291a340dd8ac70e18878bc4501dd7b4ff970cfa21c207d36ece51ea88fd" +dependencies = [ + "anstyle", + "windows-sys 0.48.0", +] [[package]] name = "anyhow" @@ -25,14 +68,14 @@ checksum = "7de8ce5e0f9f8d88245311066a578d72b7af3e7088f32783804676302df237e4" [[package]] name = "assert_cmd" -version = "2.0.10" +version = "2.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0b2340f55d9661d76793b2bfc2eb0e62689bd79d067a95707ea762afd5e9dd" +checksum = "86d6b683edf8d1119fe420a94f8a7e389239666aa72e65495d91c00462510151" dependencies = [ "anstyle", "bstr", "doc-comment", - "predicates 3.0.2", + "predicates", "predicates-core", "predicates-tree", "wait-timeout", @@ -48,19 +91,8 @@ dependencies = [ "memchr", "pin-project-lite", "tokio", - "zstd", - "zstd-safe", -] - -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi 0.1.19", - "libc", - "winapi", + "zstd 0.11.2+zstd.1.5.2", + "zstd-safe 5.0.2+zstd.1.5.2", ] [[package]] @@ -128,42 +160,51 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "3.2.23" +version = "4.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5" +checksum = "49f9152d70e42172fdb87de2efd7327160beee37886027cf86f30a233d5b30b4" dependencies = [ - "atty", - "bitflags", + "clap_builder", "clap_derive", - "clap_lex", - "indexmap", "once_cell", +] + +[[package]] +name = "clap_builder" +version = "4.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e067b220911598876eb55d52725ddcc201ffe3f0904018195973bc5b012ea2ca" +dependencies = [ + "anstream", + "anstyle", + "bitflags", + "clap_lex", "strsim", - "termcolor", - "textwrap", ] [[package]] name = "clap_derive" -version = "3.2.18" +version = "4.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea0c8bce528c4be4da13ea6fead8965e95b6073585a2f05204bd8f4119f82a65" +checksum = "3f9644cd56d6b87dbe899ef8b053e331c0637664e9e21a33dfcdc36093f5c5c4" dependencies = [ "heck", - "proc-macro-error", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.15", ] [[package]] name = "clap_lex" -version = "0.2.4" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" -dependencies = [ - "os_str_bytes", -] +checksum = "8a2dd5a6fe8c6e3502f568a6353e5273bbb15193ad9a89e457b9970798efbea1" + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" [[package]] name = "csv" @@ -210,7 +251,7 @@ dependencies = [ "streaming-iterator", "time", "tokio", - "zstd", + "zstd 0.12.3+zstd.1.5.2", ] [[package]] @@ -230,7 +271,7 @@ dependencies = [ "assert_cmd", "clap", "dbn", - "predicates 2.1.5", + "predicates", "serde", "tempfile", ] @@ -259,13 +300,13 @@ checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" [[package]] name = "errno" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d6a0976c999d473fe89ad888d5a284e55366d9dc9038b1ba2aa15128c4afa0" +checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" dependencies = [ "errno-dragonfly", "libc", - "windows-sys 0.45.0", + "windows-sys 0.48.0", ] [[package]] @@ -314,15 +355,6 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" -[[package]] -name = "hermit-abi" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - [[package]] name = "hermit-abi" version = "0.2.6" @@ -374,6 +406,18 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "is-terminal" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f" +dependencies = [ + "hermit-abi 0.3.1", + "io-lifetimes", + "rustix", + "windows-sys 0.48.0", +] + [[package]] name = "itertools" version = "0.10.5" @@ -406,9 +450,9 @@ checksum = "3304a64d199bb964be99741b7a14d26972741915b3649639149b2479bb46f4b5" [[package]] name = "linux-raw-sys" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d59d8c75012853d2e872fb56bc8a2e53718e2cafe1a4c823143141c6d90c322f" +checksum = "3f508063cc7bb32987c71511216bd5a32be15bccb6a80b52df8b9d7f01fc3aa2" [[package]] name = "lock_api" @@ -471,23 +515,23 @@ dependencies = [ [[package]] name = "num_enum" -version = "0.5.11" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f646caf906c20226733ed5b1374287eb97e3c2a5c227ce668c1f2ce20ae57c9" +checksum = "7a015b430d3c108a207fd776d2e2196aaf8b1cf8cf93253e3a097ff3085076a1" dependencies = [ "num_enum_derive", ] [[package]] name = "num_enum_derive" -version = "0.5.11" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcbff9bc912032c62bf65ef1d5aea88983b420f4f839db1e9b0c281a25c9c799" +checksum = "96667db765a921f7b295ffee8b60472b686a51d4f21c2ee4ffdb94c7013b65a6" dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.15", ] [[package]] @@ -496,12 +540,6 @@ version = "1.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" -[[package]] -name = "os_str_bytes" -version = "6.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ceedf44fb00f2d1984b0bc98102627ce622e083e49a5bacdb3e514fa4238e267" - [[package]] name = "parking_lot" version = "0.12.1" @@ -539,10 +577,11 @@ checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" [[package]] name = "predicates" -version = "2.1.5" +version = "3.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59230a63c37f3e18569bdb90e4a89cbf5bf8b06fea0b84e65ea10cc4df47addd" +checksum = "09963355b9f467184c04017ced4a2ba2d75cbcb4e7462690d388233253d4b1a9" dependencies = [ + "anstyle", "difflib", "float-cmp", "itertools", @@ -551,18 +590,6 @@ dependencies = [ "regex", ] -[[package]] -name = "predicates" -version = "3.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c575290b64d24745b6c57a12a31465f0a66f3a4799686a6921526a33b0797965" -dependencies = [ - "anstyle", - "difflib", - "itertools", - "predicates-core", -] - [[package]] name = "predicates-core" version = "1.0.6" @@ -589,30 +616,6 @@ dependencies = [ "toml_edit", ] -[[package]] -name = "proc-macro-error" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" -dependencies = [ - "proc-macro-error-attr", - "proc-macro2", - "quote", - "syn 1.0.109", - "version_check", -] - -[[package]] -name = "proc-macro-error-attr" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" -dependencies = [ - "proc-macro2", - "quote", - "version_check", -] - [[package]] name = "proc-macro2" version = "1.0.56" @@ -624,9 +627,9 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.18.2" +version = "0.18.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfb848f80438f926a9ebddf0a539ed6065434fd7aae03a89312a9821f81b8501" +checksum = "e3b1ac5b3731ba34fdaa9785f8d74d17448cd18f30cf19e0c7e7b1fdb5272109" dependencies = [ "cfg-if", "indoc", @@ -641,9 +644,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.18.2" +version = "0.18.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98a42e7f42e917ce6664c832d5eee481ad514c98250c49e0b03b20593e2c7ed0" +checksum = "9cb946f5ac61bb61a5014924910d936ebd2b23b705f7a4a3c40b05c720b079a3" dependencies = [ "once_cell", "target-lexicon", @@ -651,9 +654,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.18.2" +version = "0.18.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0707f0ab26826fe4ccd59b69106e9df5e12d097457c7b8f9c0fd1d2743eec4d" +checksum = "fd4d7c5337821916ea2a1d21d1092e8443cf34879e53a0ac653fbb98f44ff65c" dependencies = [ "libc", "pyo3-build-config", @@ -661,9 +664,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.18.2" +version = "0.18.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "978d18e61465ecd389e1f235ff5a467146dc4e3c3968b90d274fe73a5dd4a438" +checksum = "a9d39c55dab3fc5a4b25bbd1ac10a2da452c4aca13bb450f22818a002e29648d" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -673,9 +676,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.18.2" +version = "0.18.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e0e1128f85ce3fca66e435e08aa2089a2689c1c48ce97803e13f63124058462" +checksum = "97daff08a4c48320587b5224cc98d609e3c27b6d437315bd40b605c98eeb5918" dependencies = [ "proc-macro2", "quote", @@ -734,16 +737,16 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "rustix" -version = "0.37.7" +version = "0.37.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2aae838e49b3d63e9274e1c01833cc8139d3fec468c3b84688c628f44b1ae11d" +checksum = "722529a737f5a942fdbac3a46cee213053196737c5eaa3386d52e85b786f2659" dependencies = [ "bitflags", "errno", "io-lifetimes", "libc", "linux-raw-sys", - "windows-sys 0.45.0", + "windows-sys 0.48.0", ] [[package]] @@ -760,29 +763,29 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] name = "serde" -version = "1.0.159" +version = "1.0.160" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c04e8343c3daeec41f58990b9d77068df31209f2af111e059e9fe9646693065" +checksum = "bb2f3770c8bce3bcda7e149193a069a0f4365bda1fa5cd88e03bca26afc1216c" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.159" +version = "1.0.160" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c614d17805b093df4b147b51339e7e44bf05ef59fba1e45d83500bcfb4d8585" +checksum = "291a097c63d8497e00160b166a967a4a79c64f3facdd01cbd7502231688d77df" dependencies = [ "proc-macro2", "quote", - "syn 2.0.13", + "syn 2.0.15", ] [[package]] name = "serde_json" -version = "1.0.95" +version = "1.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d721eca97ac802aa7777b701877c8004d950fc142651367300d21c1cc0194744" +checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1" dependencies = [ "itoa", "ryu", @@ -820,9 +823,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.13" +version = "2.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c9da457c5285ac1f936ebd076af6dac17a61cfe7826f2076b4d015cf47bc8ec" +checksum = "a34fcf3e8b60f57e6a14301a2e916d323af98b0ea63c599441eec8558660c822" dependencies = [ "proc-macro2", "quote", @@ -848,27 +851,12 @@ dependencies = [ "windows-sys 0.45.0", ] -[[package]] -name = "termcolor" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" -dependencies = [ - "winapi-util", -] - [[package]] name = "termtree" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" -[[package]] -name = "textwrap" -version = "0.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" - [[package]] name = "time" version = "0.3.20" @@ -917,7 +905,7 @@ checksum = "61a573bdc87985e9d6ddeed1b3d864e8a302c847e40d647746df2f1de209d1ce" dependencies = [ "proc-macro2", "quote", - "syn 2.0.13", + "syn 2.0.15", ] [[package]] @@ -959,10 +947,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1766d682d402817b5ac4490b3c3002d91dfa0d22812f341609f97b08757359c" [[package]] -name = "version_check" -version = "0.9.4" +name = "utf8parse" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] name = "wait-timeout" @@ -973,37 +961,6 @@ dependencies = [ "libc", ] -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-util" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" -dependencies = [ - "winapi", -] - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - [[package]] name = "windows-sys" version = "0.45.0" @@ -1151,7 +1108,16 @@ version = "0.11.2+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" dependencies = [ - "zstd-safe", + "zstd-safe 5.0.2+zstd.1.5.2", +] + +[[package]] +name = "zstd" +version = "0.12.3+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76eea132fb024e0e13fd9c2f5d5d595d8a967aa72382ac2f9d39fcc95afd0806" +dependencies = [ + "zstd-safe 6.0.5+zstd.1.5.4", ] [[package]] @@ -1164,6 +1130,16 @@ dependencies = [ "zstd-sys", ] +[[package]] +name = "zstd-safe" +version = "6.0.5+zstd.1.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d56d9e60b4b1758206c238a10165fbcae3ca37b01744e394c463463f6529d23b" +dependencies = [ + "libc", + "zstd-sys", +] + [[package]] name = "zstd-sys" version = "2.0.8+zstd.1.5.5" diff --git a/c/Cargo.toml b/c/Cargo.toml index 30ed0d5..4c2ce34 100644 --- a/c/Cargo.toml +++ b/c/Cargo.toml @@ -16,7 +16,7 @@ crate-type = ["cdylib"] [dependencies] # DBN library dbn = { path = "../rust/dbn", features = [] } -libc = "0.2.140" +libc = "0.2.141" [build-dependencies] cbindgen = { version = "0.24.3", default_features = false } diff --git a/python/Cargo.toml b/python/Cargo.toml index 6d05dfc..6cb6241 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -16,7 +16,7 @@ name = "databento_dbn" # Python modules can't contain dashes # DBN library dbn = { path = "../rust/dbn", features = ["python"] } # Python bindings for Rust -pyo3 = { version = "0.18.2", features = [] } +pyo3 = { version = "0.18.3", features = [] } [build-dependencies] -pyo3-build-config = { version = "0.18.2" } +pyo3-build-config = { version = "0.18.3" } diff --git a/rust/dbn-cli/Cargo.toml b/rust/dbn-cli/Cargo.toml index fccd5d6..72a55b1 100644 --- a/rust/dbn-cli/Cargo.toml +++ b/rust/dbn-cli/Cargo.toml @@ -20,16 +20,16 @@ path = "src/main.rs" dbn = { path = "../dbn", version = "=0.4.3" } # Error handling -anyhow = "1.0.68" +anyhow = "1.0.70" # CLI argument parsing -clap = { version = "3.2", features = ["derive"] } +clap = { version = "4.2", features = ["derive"] } # deserialization for CLI args serde = { version = "1.0", features = ["derive"] } [dev-dependencies] # CLI integration tests -assert_cmd = "2.0.7" +assert_cmd = "2.0.11" # assert_cmd companion -predicates = "2.1.5" +predicates = "3.0.3" # A library for managing temporary files and directories -tempfile = "3.3.0" +tempfile = "3.5.0" diff --git a/rust/dbn-cli/tests/integration_tests.rs b/rust/dbn-cli/tests/integration_tests.rs index 7761391..d412d79 100644 --- a/rust/dbn-cli/tests/integration_tests.rs +++ b/rust/dbn-cli/tests/integration_tests.rs @@ -325,7 +325,7 @@ fn help() { .arg("--help") .assert() .success() - .stdout(contains("USAGE:")); + .stdout(contains("Usage:")); } #[test] diff --git a/rust/dbn/Cargo.toml b/rust/dbn/Cargo.toml index feb1986..dfa43c5 100644 --- a/rust/dbn/Cargo.toml +++ b/rust/dbn/Cargo.toml @@ -27,9 +27,9 @@ async-compression = { version = "0.3.15", features = ["tokio", "zstd"], optional # CSV serialization csv = "1.2" # Deriving translation between integers and enums -num_enum = "0.5" +num_enum = "0.6" # Python bindings for Rust -pyo3 = { version = "0.18.2", optional = true } +pyo3 = { version = "0.18.3", optional = true } # Derialization serde = { version = "1.0", features = ["derive"] } # JSON serialization @@ -39,9 +39,9 @@ streaming-iterator = "0.1.9" # date and datetime support time = { version = "0.3", features = ["serde"] } # async traits -tokio = { version = "1.25", features = ["io-util"], optional = true } +tokio = { version = "1.27", features = ["io-util"], optional = true } # (de)compression -zstd = "=0.11.2" +zstd = "=0.12.3" [dev-dependencies] -tokio = { version = "1.25", features = ["fs", "io-util", "macros", "rt-multi-thread"] } +tokio = { version = "1.27", features = ["fs", "io-util", "macros", "rt-multi-thread"] } From fc0dcb4d2e6879daf1b899cbd4ed789ab016a923 Mon Sep 17 00:00:00 2001 From: Carter Green Date: Thu, 20 Apr 2023 11:34:21 -0500 Subject: [PATCH 16/22] ADD: Add IndicativeOpeningPrice to DBN --- rust/dbn/src/enums.rs | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/rust/dbn/src/enums.rs b/rust/dbn/src/enums.rs index 308f7ac..bd3cab9 100644 --- a/rust/dbn/src/enums.rs +++ b/rust/dbn/src/enums.rs @@ -582,30 +582,33 @@ impl Serialize for SecurityUpdateAction { pub enum StatType { /// The price of the first trade of an instrument. `price` will be set. OpeningPrice = 1, + /// The probable price of the first trade of an instrument published during pre- + /// open. Both `price` and `quantity` will be set. + IndicativeOpeningPrice = 2, /// The settlement price of an instrument. `price` will be set and `flags` indicate /// whether the price is final or preliminary and actual or theoretical. - SettlementPrice = 2, + SettlementPrice = 3, /// The lowest trade price of an instrument during the trading session. `price` will /// be set. - TradingSessionLowPrice = 3, + TradingSessionLowPrice = 4, /// The highest trade price of an instrument during the trading session. `price` will /// be set. - TradingSessionHighPrice = 4, + TradingSessionHighPrice = 5, /// The number of contracts cleared for an instrument on the previous trading date. /// `quantity` will be set. - ClearedVolume = 5, + ClearedVolume = 6, /// The lowest offer price for an instrument during the trading session. `price` /// will be set. - LowestOffer = 6, + LowestOffer = 7, /// The highest bid price for an instrument during the trading session. `price` /// will be set. - HighestBid = 7, + HighestBid = 8, /// The current number of outstanding contracts of an instrument. `quantity` will // be set. - OpenInterest = 8, + OpenInterest = 9, /// The volume-weighted average price (VWAP) for a fixing period. `price` will be /// set. - FixingPrice = 9, + FixingPrice = 10, } /// The type of [`StatMsg`](crate::record::StatMsg) update. From a085f4ff5df2ee17a6bdcf60f210926af102965e Mon Sep 17 00:00:00 2001 From: Carter Green Date: Mon, 17 Apr 2023 14:18:24 -0500 Subject: [PATCH 17/22] MOD: Rename STypes in DBN --- CHANGELOG.md | 5 ++++ c/src/lib.rs | 4 +-- python/README.md | 4 +-- python/databento_dbn.pyi | 6 ++-- python/src/dbn_decoder.rs | 10 +++---- python/src/encode.rs | 2 +- python/src/lib.rs | 12 ++++---- rust/dbn/src/decode/dbn.rs | 12 ++++---- rust/dbn/src/decode/dbz.rs | 6 ++-- rust/dbn/src/encode/csv.rs | 32 ++++++++++----------- rust/dbn/src/encode/dbn.rs | 36 ++++++++++++------------ rust/dbn/src/encode/json.rs | 16 +++++------ rust/dbn/src/encode/mod.rs | 2 +- rust/dbn/src/enums.rs | 27 ++++++++++++++---- rust/dbn/src/metadata.rs | 8 +++--- rust/dbn/src/python.rs | 56 ++++++++++++++++++------------------- rust/dbn/src/record.rs | 26 ++++++++--------- 17 files changed, 142 insertions(+), 122 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6788016..99bf208 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,11 @@ ## 0.5.0 - TBD - Added support for Statistics schema - Changed `schema` and `stype_in` to optional in `Metadata` to support live data +- Renamed `SType::ProductId` to `SType::InstrumentId` and `SType::Native` to `SType::RawSymbol` +- Renamed `RecordHeader::product_id` to `instrument_id` +- Renamed `InstrumentDefMsg::symbol` to `raw_symbol` +- Renamed `SymbolMapping::native_symbol` to `raw_symbol` +- Deprecated `SType::Smart` to split into `SType::Parent` and `SType::Continuous` - Added `RType` enum for exhaustive pattern matching - Added `&str` getters for more `c_char` array record fields - Changed `DbnDecoder.decode` to always return a list of tuples diff --git a/c/src/lib.rs b/c/src/lib.rs index 5577a13..6f907a2 100644 --- a/c/src/lib.rs +++ b/c/src/lib.rs @@ -44,8 +44,8 @@ pub unsafe extern "C" fn encode_metadata( let metadata = MetadataBuilder::new() .dataset(dataset) .start(start) - .stype_in(Some(SType::ProductId)) - .stype_out(SType::ProductId) + .stype_in(Some(SType::InstrumentId)) + .stype_out(SType::InstrumentId) .schema(Some(schema)) .build(); let buffer: &mut [u8] = slice::from_raw_parts_mut(buffer as *mut u8, length); diff --git a/python/README.md b/python/README.md index 2604c65..f2e1a5a 100644 --- a/python/README.md +++ b/python/README.md @@ -33,13 +33,13 @@ You can write Zstd-compressed DBN files using `write_dbn_file`: from databento_dbn import write_dbn_file records = [ - {"rtype": 160, "publisher_id": 1, "product_id": 1, "ts_event": 647784973705, "order_id": 1, + {"rtype": 160, "publisher_id": 1, "instrument_id": 1, "ts_event": 647784973705, "order_id": 1, "price": 3723000000000, "size": 1, "flags": 128, "channel_id": 0, "action": ord('C'), "side": ord('A'), "ts_recv": 1609160400000704060, "ts_in_delta": 0, "sequence": 1170352} ] with open("my.dbn.zst", "wb") as out: write_dbn_file(file=out, compression="zstd", schema="mbo", dataset="custom", - records=records, stype="product_id") + records=records, stype="instrument_id") ``` Note that the keys in the dictionaries in `records` must match the field names of the schema, or the function will raise a `KeyError`. diff --git a/python/databento_dbn.pyi b/python/databento_dbn.pyi index 2b90da1..a90a63c 100644 --- a/python/databento_dbn.pyi +++ b/python/databento_dbn.pyi @@ -245,7 +245,7 @@ class RecordHeader: """ @property - def product_id(self) -> int: + def instrument_id(self) -> int: """ The numeric product ID assigned to the instrument. @@ -808,7 +808,7 @@ class InstrumentDefMsg(Record): @property def underlying_id(self) -> int: """ - The `product_id` of the first underlying instrument. + The `instrument_id` of the first underlying instrument. Returns ------- @@ -1032,7 +1032,7 @@ class InstrumentDefMsg(Record): """ @property - def symbol(self) -> str: + def raw_symbol(self) -> str: """ The instrument name (symbol). diff --git a/python/src/dbn_decoder.rs b/python/src/dbn_decoder.rs index 8acfe7e..1edfcb9 100644 --- a/python/src/dbn_decoder.rs +++ b/python/src/dbn_decoder.rs @@ -145,8 +145,8 @@ mod tests { &MetadataBuilder::new() .dataset("XNAS.ITCH".to_owned()) .schema(Some(Schema::Trades)) - .stype_in(Some(SType::Native)) - .stype_out(SType::ProductId) + .stype_in(Some(SType::RawSymbol)) + .stype_out(SType::InstrumentId) .start(0) .build(), ) @@ -184,8 +184,8 @@ mod tests { &MetadataBuilder::new() .dataset("XNAS.ITCH".to_owned()) .schema(Some(Schema::Ohlcv1S)) - .stype_in(Some(SType::Native)) - .stype_out(SType::ProductId) + .stype_in(Some(SType::RawSymbol)) + .stype_out(SType::InstrumentId) .start(0) .build(), ) @@ -263,7 +263,7 @@ metadata = Metadata( schema="mbo", start=1, stype_in="native", - stype_out="product_id", + stype_out="instrument_id", end=2, symbols=[], partial=[], diff --git a/python/src/encode.rs b/python/src/encode.rs index 684ea1a..b606911 100644 --- a/python/src/encode.rs +++ b/python/src/encode.rs @@ -241,7 +241,7 @@ mod tests { } const DATASET: &str = "GLBX.MDP3"; - const STYPE: SType = SType::ProductId; + const STYPE: SType = SType::InstrumentId; macro_rules! test_writing_dbn_from_python { ($test_name:ident, $record_type:ident, $schema:expr) => { diff --git a/python/src/lib.rs b/python/src/lib.rs index f6075ba..3785bf1 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -62,8 +62,8 @@ mod tests { fn test_metadata_identity() { // initialize interpreter setup(); - let stype_in = SType::Native as u8; - let stype_out = SType::ProductId as u8; + let stype_in = SType::RawSymbol as u8; + let stype_out = SType::InstrumentId as u8; Python::with_gil(|py| { pyo3::py_run!( py, @@ -74,8 +74,8 @@ metadata = Metadata( dataset="GLBX.MDP3", schema="mbo", start=1, - stype_in="native", - stype_out="product_id", + stype_in="raw_symbol", + stype_out="instrument_id", end=2, symbols=[], partial=[], @@ -89,8 +89,8 @@ assert metadata.schema == "mbo" assert metadata.start == 1 assert metadata.end == 2 assert metadata.limit is None -assert metadata.stype_in == "native" -assert metadata.stype_out == "product_id" +assert metadata.stype_in == "raw_symbol" +assert metadata.stype_out == "instrument_id" assert metadata.ts_out is False"# ); }); diff --git a/rust/dbn/src/decode/dbn.rs b/rust/dbn/src/decode/dbn.rs index 44c286d..a2d076a 100644 --- a/rust/dbn/src/decode/dbn.rs +++ b/rust/dbn/src/decode/dbn.rs @@ -426,8 +426,8 @@ where "Unexpected end of metadata buffer while parsing symbol mapping" )); } - let native_symbol = - Self::decode_symbol(buffer, pos).with_context(|| "Couldn't parse native symbol")?; + let raw_symbol = + Self::decode_symbol(buffer, pos).with_context(|| "Couldn't parse raw symbol")?; let interval_count = u32::from_le_slice(&buffer[*pos..]) as usize; *pos += Self::U32_SIZE; let read_size = interval_count * MAPPING_INTERVAL_ENCODED_LEN; @@ -460,7 +460,7 @@ where }); } Ok(SymbolMapping { - native_symbol, + raw_symbol, intervals, }) } @@ -640,8 +640,8 @@ mod tests { .dataset("XNAS.ITCH".to_owned()) .schema(Some(Schema::Mbo)) .start(0) - .stype_in(Some(SType::ProductId)) - .stype_out(SType::ProductId) + .stype_in(Some(SType::InstrumentId)) + .stype_out(SType::InstrumentId) .build(), ) .unwrap(); @@ -1021,7 +1021,7 @@ mod r#async { ts_out: 1678486110, }; let mut rec2 = rec1.clone(); - rec2.rec.hd.product_id += 1; + rec2.rec.hd.instrument_id += 1; rec2.ts_out = 1678486827; let mut buffer = Vec::new(); let mut encoder = AsyncRecordEncoder::new(&mut buffer); diff --git a/rust/dbn/src/decode/dbz.rs b/rust/dbn/src/decode/dbz.rs index 7702292..9b8eca8 100644 --- a/rust/dbn/src/decode/dbz.rs +++ b/rust/dbn/src/decode/dbz.rs @@ -314,8 +314,8 @@ impl MetadataDecoder { "Unexpected end of metadata buffer while parsing symbol mapping" )); } - let native_symbol = - Self::decode_symbol(buffer, pos).with_context(|| "Couldn't parse native symbol")?; + let raw_symbol = + Self::decode_symbol(buffer, pos).with_context(|| "Couldn't parse raw symbol")?; let interval_count = u32::from_le_slice(&buffer[*pos..]) as usize; *pos += Self::U32_SIZE; let read_size = interval_count * MAPPING_INTERVAL_ENCODED_SIZE; @@ -348,7 +348,7 @@ impl MetadataDecoder { }); } Ok(SymbolMapping { - native_symbol, + raw_symbol, intervals, }) } diff --git a/rust/dbn/src/encode/csv.rs b/rust/dbn/src/encode/csv.rs index a649522..f0a672c 100644 --- a/rust/dbn/src/encode/csv.rs +++ b/rust/dbn/src/encode/csv.rs @@ -169,7 +169,7 @@ pub(crate) mod serialize { [ "rtype", "publisher_id", - "product_id", + "instrument_id", "ts_event", "order_id", "price", @@ -192,7 +192,7 @@ pub(crate) mod serialize { [ "rtype", "publisher_id", - "product_id", + "instrument_id", "ts_event", "price", "size", @@ -220,7 +220,7 @@ pub(crate) mod serialize { [ "rtype", "publisher_id", - "product_id", + "instrument_id", "ts_event", "price", "size", @@ -299,7 +299,7 @@ pub(crate) mod serialize { fn serialize_to(&self, csv_writer: &mut Writer) -> csv::Result<()> { csv_writer.write_field(self.hd.rtype.to_string())?; csv_writer.write_field(self.hd.publisher_id.to_string())?; - csv_writer.write_field(self.hd.product_id.to_string())?; + csv_writer.write_field(self.hd.instrument_id.to_string())?; csv_writer.write_field(self.hd.ts_event.to_string())?; csv_writer.write_field(self.price.to_string())?; csv_writer.write_field(self.size.to_string())?; @@ -329,7 +329,7 @@ pub(crate) mod serialize { [ "rtype", "publisher_id", - "product_id", + "instrument_id", "ts_event", "price", "size", @@ -351,7 +351,7 @@ pub(crate) mod serialize { [ "rtype", "publisher_id", - "product_id", + "instrument_id", "ts_event", "open", "high", @@ -369,7 +369,7 @@ pub(crate) mod serialize { [ "rtype", "publisher_id", - "product_id", + "instrument_id", "ts_event", "ts_recv", "group", @@ -387,7 +387,7 @@ pub(crate) mod serialize { [ "rtype", "publisher_id", - "product_id", + "instrument_id", "ts_event", "ts_recv", "min_price_increment", @@ -424,7 +424,7 @@ pub(crate) mod serialize { "currency", "settl_currency", "secsubtype", - "symbol", + "raw_symbol", "group", "exchange", "asset", @@ -461,7 +461,7 @@ pub(crate) mod serialize { [ "rtype", "publisher_id", - "product_id", + "instrument_id", "ts_event", "ts_recv", "ref_price", @@ -494,7 +494,7 @@ pub(crate) mod serialize { [ "rtype", "publisher_id", - "product_id", + "instrument_id", "ts_event", "ts_recv", "ts_ref", @@ -514,7 +514,7 @@ pub(crate) mod serialize { impl CsvSerialize for ErrorMsg { fn serialize_header(csv_writer: &mut Writer) -> csv::Result<()> { - ["rtype", "publisher_id", "product_id", "ts_event", "err"] + ["rtype", "publisher_id", "instrument_id", "ts_event", "err"] .iter() .try_for_each(|header| csv_writer.write_field(header)) } @@ -522,7 +522,7 @@ pub(crate) mod serialize { impl CsvSerialize for SystemMsg { fn serialize_header(csv_writer: &mut Writer) -> csv::Result<()> { - ["rtype", "publisher_id", "product_id", "ts_event", "msg"] + ["rtype", "publisher_id", "instrument_id", "ts_event", "msg"] .iter() .try_for_each(|header| csv_writer.write_field(header)) } @@ -533,7 +533,7 @@ pub(crate) mod serialize { [ "rtype", "publisher_id", - "product_id", + "instrument_id", "ts_event", "stype_in_symbol", "stype_out_symbol", @@ -782,7 +782,7 @@ mod tests { currency: [0; 4], settl_currency: str_to_c_chars("USD").unwrap(), secsubtype: [0; 6], - symbol: [0; 22], + raw_symbol: [0; 22], group: [0; 21], exchange: [0; 5], asset: [0; 7], @@ -846,7 +846,7 @@ mod tests { let lines = String::from_utf8(buffer).expect("valid UTF-8"); assert_eq!( lines, - format!("ts_out,rtype,publisher_id,product_id,ts_event,price,size,action,side,flags,depth,ts_recv,ts_in_delta,sequence\n1678480044000000000,{HEADER_CSV},5500,3,T,A,128,9,1658441891000000000,22000,1002375\n") + format!("ts_out,rtype,publisher_id,instrument_id,ts_event,price,size,action,side,flags,depth,ts_recv,ts_in_delta,sequence\n1678480044000000000,{HEADER_CSV},5500,3,T,A,128,9,1658441891000000000,22000,1002375\n") ); } diff --git a/rust/dbn/src/encode/dbn.rs b/rust/dbn/src/encode/dbn.rs index 8e47d53..2f37210 100644 --- a/rust/dbn/src/encode/dbn.rs +++ b/rust/dbn/src/encode/dbn.rs @@ -209,7 +209,7 @@ where } fn encode_symbol_mapping(&mut self, symbol_mapping: &SymbolMapping) -> anyhow::Result<()> { - self.encode_fixed_len_cstr::<{ crate::SYMBOL_CSTR_LEN }>(&symbol_mapping.native_symbol)?; + self.encode_fixed_len_cstr::<{ crate::SYMBOL_CSTR_LEN }>(&symbol_mapping.raw_symbol)?; // encode interval_count self.writer.write_all( (symbol_mapping.intervals.len() as u32) @@ -304,8 +304,8 @@ mod tests { version: crate::DBN_VERSION, dataset: "GLBX.MDP3".to_owned(), schema: Some(Schema::Mbp10), - stype_in: Some(SType::Native), - stype_out: SType::ProductId, + stype_in: Some(SType::RawSymbol), + stype_out: SType::InstrumentId, start: 1657230820000000000, end: NonZeroU64::new(1658960170000000000), limit: None, @@ -315,7 +315,7 @@ mod tests { not_found: vec!["QQQQQ".to_owned()], mappings: vec![ SymbolMapping { - native_symbol: "ES.0".to_owned(), + raw_symbol: "ES.0".to_owned(), intervals: vec![MappingInterval { start_date: time::Date::from_calendar_date(2022, time::Month::July, 26) .unwrap(), @@ -325,7 +325,7 @@ mod tests { }], }, SymbolMapping { - native_symbol: "NG.0".to_owned(), + raw_symbol: "NG.0".to_owned(), intervals: vec![ MappingInterval { start_date: time::Date::from_calendar_date(2022, time::Month::July, 26) @@ -421,8 +421,8 @@ mod tests { version: crate::DBN_VERSION, dataset: "GLBX.MDP3".to_owned(), schema: Some(Schema::Mbo), - stype_in: Some(SType::Smart), - stype_out: SType::Native, + stype_in: Some(SType::Parent), + stype_out: SType::RawSymbol, start: 1657230820000000000, end: NonZeroU64::new(1658960170000000000), limit: None, @@ -466,8 +466,8 @@ mod tests { .dataset("XNAS.ITCH".to_owned()) .schema(Some(Schema::Mbo)) .start(1697240529000000000) - .stype_in(Some(SType::Native)) - .stype_out(SType::ProductId) + .stype_in(Some(SType::RawSymbol)) + .stype_out(SType::InstrumentId) .build(); assert!(metadata.end.is_none()); assert!(metadata.limit.is_none()); @@ -484,8 +484,8 @@ mod tests { .dataset("XNAS.ITCH".to_owned()) .schema(Some(Schema::Mbo)) .start(1697240529000000000) - .stype_in(Some(SType::Native)) - .stype_out(SType::ProductId) + .stype_in(Some(SType::RawSymbol)) + .stype_out(SType::InstrumentId) .build(); let calc_length = MetadataEncoder::>::calc_length(&metadata); let mut buffer = Vec::new(); @@ -709,7 +709,7 @@ mod r#async { &mut self, symbol_mapping: &SymbolMapping, ) -> anyhow::Result<()> { - self.encode_fixed_len_cstr::<{ crate::SYMBOL_CSTR_LEN }>(&symbol_mapping.native_symbol) + self.encode_fixed_len_cstr::<{ crate::SYMBOL_CSTR_LEN }>(&symbol_mapping.raw_symbol) .await?; // encode interval_count self.writer @@ -788,8 +788,8 @@ mod r#async { version: crate::DBN_VERSION, dataset: "GLBX.MDP3".to_owned(), schema: Some(Schema::Mbp10), - stype_in: Some(SType::Native), - stype_out: SType::ProductId, + stype_in: Some(SType::RawSymbol), + stype_out: SType::InstrumentId, start: 1657230820000000000, end: NonZeroU64::new(1658960170000000000), limit: None, @@ -799,7 +799,7 @@ mod r#async { not_found: vec!["QQQQQ".to_owned()], mappings: vec![ SymbolMapping { - native_symbol: "ES.0".to_owned(), + raw_symbol: "ES.0".to_owned(), intervals: vec![MappingInterval { start_date: time::Date::from_calendar_date(2022, time::Month::July, 26) .unwrap(), @@ -813,7 +813,7 @@ mod r#async { }], }, SymbolMapping { - native_symbol: "NG.0".to_owned(), + raw_symbol: "NG.0".to_owned(), intervals: vec![ MappingInterval { start_date: time::Date::from_calendar_date( @@ -919,8 +919,8 @@ mod r#async { .dataset("XNAS.ITCH".to_owned()) .schema(Some(Schema::Mbo)) .start(1697240529000000000) - .stype_in(Some(SType::Native)) - .stype_out(SType::ProductId) + .stype_in(Some(SType::RawSymbol)) + .stype_out(SType::InstrumentId) .build(); assert!(metadata.end.is_none()); assert!(metadata.limit.is_none()); diff --git a/rust/dbn/src/encode/json.rs b/rust/dbn/src/encode/json.rs index b3fab43..a533dcc 100644 --- a/rust/dbn/src/encode/json.rs +++ b/rust/dbn/src/encode/json.rs @@ -174,7 +174,7 @@ mod tests { } const HEADER_JSON: &str = - r#""hd":{"rtype":4,"publisher_id":1,"product_id":323,"ts_event":"1658441851000000000"}"#; + r#""hd":{"rtype":4,"publisher_id":1,"instrument_id":323,"ts_event":"1658441851000000000"}"#; const BID_ASK_JSON: &str = r#"{"bid_px":372000000000000,"ask_px":372500000000000,"bid_sz":10,"ask_sz":5,"bid_ct":5,"ask_ct":2}"#; #[test] @@ -380,7 +380,7 @@ mod tests { currency: [0; 4], settl_currency: str_to_c_chars("USD").unwrap(), secsubtype: [0; 6], - symbol: [0; 22], + raw_symbol: [0; 22], group: [0; 21], exchange: [0; 5], asset: [0; 7], @@ -424,7 +424,7 @@ mod tests { r#""min_price_increment_amount":5,"price_ratio":10,"inst_attrib_value":10,"underlying_id":256785,"cleared_volume":0,"market_depth_implied":0,"#, r#""market_depth":13,"market_segment_id":0,"max_trade_vol":10000,"min_lot_size":1,"min_lot_size_block":1000,"min_lot_size_round_lot":100,"min_trade_vol":1,"#, r#""open_interest_qty":0,"contract_multiplier":0,"decay_quantity":0,"original_contract_size":0,"trading_reference_date":0,"appl_id":0,"#, - r#""maturity_year":0,"decay_start_date":0,"channel_id":4,"currency":"","settl_currency":"USD","secsubtype":"","symbol":"","group":"","exchange":"","asset":"","cfi":"","#, + r#""maturity_year":0,"decay_start_date":0,"channel_id":4,"currency":"","settl_currency":"USD","secsubtype":"","raw_symbol":"","group":"","exchange":"","asset":"","cfi":"","#, r#""security_type":"","unit_of_measure":"","underlying":"","strike_price_currency":"","instrument_class":"F","strike_price":0,"match_algorithm":"F","md_security_trading_status":2,"main_fraction":4,"price_display_format":8,"#, r#""settl_price_type":9,"sub_fraction":23,"underlying_product":10,"security_update_action":"A","maturity_month":8,"maturity_day":9,"maturity_week":11,"#, r#""user_defined_instrument":"N","contract_multiplier_unit":0,"flow_schedule_type":5,"tick_rule":0"# @@ -518,14 +518,14 @@ mod tests { start: 1662734705128748281, end: NonZeroU64::new(1662734720914876944), limit: None, - stype_in: Some(SType::ProductId), - stype_out: SType::Native, + stype_in: Some(SType::InstrumentId), + stype_out: SType::RawSymbol, ts_out: false, symbols: vec!["ESZ2".to_owned()], partial: Vec::new(), not_found: Vec::new(), mappings: vec![SymbolMapping { - native_symbol: "ESZ2".to_owned(), + raw_symbol: "ESZ2".to_owned(), intervals: vec![MappingInterval { start_date: time::Date::from_calendar_date(2022, time::Month::September, 9) .unwrap(), @@ -540,8 +540,8 @@ mod tests { res, "{\"version\":1,\"dataset\":\"GLBX.MDP3\",\"schema\":\"ohlcv-1h\",\"start\"\ :1662734705128748281,\"end\":1662734720914876944,\"limit\":0,\ - \"stype_in\":\"product_id\",\"stype_out\":\"native\",\"ts_out\":false,\"symbols\"\ - :[\"ESZ2\"],\"partial\":[],\"not_found\":[],\"mappings\":[{\"native_symbol\":\"ESZ2\",\ + \"stype_in\":\"instrument_id\",\"stype_out\":\"raw_symbol\",\"ts_out\":false,\"symbols\"\ + :[\"ESZ2\"],\"partial\":[],\"not_found\":[],\"mappings\":[{\"raw_symbol\":\"ESZ2\",\ \"intervals\":[{\"start_date\":\"2022-09-09\",\"end_date\":\"2022-09-10\",\"symbol\":\ \"ESH2\"}]}]}\n" ); diff --git a/rust/dbn/src/encode/mod.rs b/rust/dbn/src/encode/mod.rs index d4e636e..4827596 100644 --- a/rust/dbn/src/encode/mod.rs +++ b/rust/dbn/src/encode/mod.rs @@ -354,7 +354,7 @@ mod test_data { length: 30, rtype: 4, publisher_id: 1, - product_id: 323, + instrument_id: 323, ts_event: 1658441851000000000, }; diff --git a/rust/dbn/src/enums.rs b/rust/dbn/src/enums.rs index bd3cab9..6f29a96 100644 --- a/rust/dbn/src/enums.rs +++ b/rust/dbn/src/enums.rs @@ -1,3 +1,5 @@ +#![allow(deprecated)] // TODO: remove with SType::Smart + //! Enums used in Databento APIs. use std::fmt::{self, Display, Formatter}; @@ -163,11 +165,19 @@ impl serde::Serialize for UserDefinedInstrument { #[repr(u8)] pub enum SType { /// Symbology using a unique numeric ID. - ProductId = 0, + InstrumentId = 0, /// Symbology using the original symbols provided by the publisher. - Native = 1, + RawSymbol = 1, /// A set of Databento-specific symbologies for referring to groups of symbols. + #[deprecated(since = "0.5.0", note = "Smart was split into Continuous and Parent.")] Smart = 2, + /// A Databento-specific symbology where one symbol may point to different + /// instruments at different points of time, e.g. to always refer to the front month + /// future. + Continuous = 3, + /// A Databento-specific symbology for referring to a group of symbols by one + /// "parent" symbol, e.g. ES.FUT to refer to all ES futures. + Parent = 4, } impl std::str::FromStr for SType { @@ -175,9 +185,11 @@ impl std::str::FromStr for SType { fn from_str(s: &str) -> Result { match s { - "product_id" => Ok(SType::ProductId), - "native" => Ok(SType::Native), + "instrument_id" | "product_id" => Ok(SType::InstrumentId), + "raw_symbol" | "native" => Ok(SType::RawSymbol), "smart" => Ok(SType::Smart), + "continuous" => Ok(SType::Continuous), + "parent" => Ok(SType::Parent), _ => Err(ConversionError::TypeConversion( "Value doesn't match a valid symbol type", )), @@ -195,9 +207,12 @@ impl SType { /// Convert the symbology type to its `str` representation. pub const fn as_str(&self) -> &'static str { match self { - SType::Native => "native", + SType::InstrumentId => "instrument_id", + SType::RawSymbol => "raw_symbol", + #[allow(deprecated)] SType::Smart => "smart", - SType::ProductId => "product_id", + SType::Continuous => "continuous", + SType::Parent => "parent", } } } diff --git a/rust/dbn/src/metadata.rs b/rust/dbn/src/metadata.rs index 9173c5d..aeeef2b 100644 --- a/rust/dbn/src/metadata.rs +++ b/rust/dbn/src/metadata.rs @@ -61,7 +61,7 @@ pub struct Metadata { /// Symbols that did not resolve for _any_ day in the query time range. #[pyo3(get)] pub not_found: Vec, - /// Symbol mappings containing a native symbol and its mapping intervals. + /// Symbol mappings containing a raw symbol and its mapping intervals. pub mappings: Vec, } @@ -294,12 +294,12 @@ impl Default for MetadataBuilder { } } -/// A native symbol and its symbol mappings for different time ranges within the query range. +/// A raw symbol and its symbol mappings for different time ranges within the query range. #[derive(Debug, Clone, PartialEq, Eq, Serialize)] #[cfg_attr(feature = "python", derive(pyo3::FromPyObject))] pub struct SymbolMapping { - /// The native symbol. - pub native_symbol: String, + /// The symbol assigned by publisher. + pub raw_symbol: String, /// The mappings of `native` for different date ranges. pub intervals: Vec, } diff --git a/rust/dbn/src/python.rs b/rust/dbn/src/python.rs index 48e8e28..e0e744d 100644 --- a/rust/dbn/src/python.rs +++ b/rust/dbn/src/python.rs @@ -83,7 +83,7 @@ impl Metadata { fn get_mappings(&self) -> HashMap> { let mut res = HashMap::new(); for mapping in self.mappings.iter() { - res.insert(mapping.native_symbol.clone(), mapping.intervals.clone()); + res.insert(mapping.raw_symbol.clone(), mapping.intervals.clone()); } res } @@ -117,8 +117,8 @@ impl IntoPy for SymbolMapping { impl ToPyObject for SymbolMapping { fn to_object(&self, py: Python<'_>) -> PyObject { let dict = PyDict::new(py); - dict.set_item("native_symbol", &self.native_symbol) - .expect("set native_symbol"); + dict.set_item("raw_symbol", &self.raw_symbol) + .expect("set raw_symbol"); dict.set_item("intervals", &self.intervals) .expect("set intervals"); dict.into_py(py) @@ -266,7 +266,7 @@ impl MboMsg { #[new] fn py_new( publisher_id: u16, - product_id: u32, + instrument_id: u32, ts_event: u64, order_id: u64, price: i64, @@ -280,7 +280,7 @@ impl MboMsg { flags: Option, ) -> Self { Self { - hd: RecordHeader::new::(rtype::MBO, publisher_id, product_id, ts_event), + hd: RecordHeader::new::(rtype::MBO, publisher_id, instrument_id, ts_event), order_id, price, size, @@ -347,7 +347,7 @@ impl TradeMsg { #[new] fn py_new( publisher_id: u16, - product_id: u32, + instrument_id: u32, ts_event: u64, price: i64, size: u32, @@ -360,7 +360,7 @@ impl TradeMsg { flags: Option, ) -> Self { Self { - hd: RecordHeader::new::(rtype::MBP_0, publisher_id, product_id, ts_event), + hd: RecordHeader::new::(rtype::MBP_0, publisher_id, instrument_id, ts_event), price, size, action, @@ -400,7 +400,7 @@ impl Mbp1Msg { #[new] fn py_new( publisher_id: u16, - product_id: u32, + instrument_id: u32, ts_event: u64, price: i64, size: u32, @@ -414,7 +414,7 @@ impl Mbp1Msg { booklevel: Option, ) -> Self { Self { - hd: RecordHeader::new::(rtype::MBP_1, publisher_id, product_id, ts_event), + hd: RecordHeader::new::(rtype::MBP_1, publisher_id, instrument_id, ts_event), price, size, action, @@ -455,7 +455,7 @@ impl Mbp10Msg { #[new] fn py_new( publisher_id: u16, - product_id: u32, + instrument_id: u32, ts_event: u64, price: i64, size: u32, @@ -481,7 +481,7 @@ impl Mbp10Msg { Default::default() }; Ok(Self { - hd: RecordHeader::new::(rtype::MBP_10, publisher_id, product_id, ts_event), + hd: RecordHeader::new::(rtype::MBP_10, publisher_id, instrument_id, ts_event), price, size, action, @@ -523,7 +523,7 @@ impl OhlcvMsg { fn py_new( rtype: u8, publisher_id: u16, - product_id: u32, + instrument_id: u32, ts_event: u64, open: i64, high: i64, @@ -532,7 +532,7 @@ impl OhlcvMsg { volume: u64, ) -> Self { Self { - hd: RecordHeader::new::(rtype, publisher_id, product_id, ts_event), + hd: RecordHeader::new::(rtype, publisher_id, instrument_id, ts_event), open, high, low, @@ -568,7 +568,7 @@ impl StatusMsg { #[new] fn py_new( publisher_id: u16, - product_id: u32, + instrument_id: u32, ts_event: u64, ts_recv: u64, group: &str, @@ -577,7 +577,7 @@ impl StatusMsg { trading_event: u8, ) -> PyResult { Ok(Self { - hd: RecordHeader::new::(rtype::STATUS, publisher_id, product_id, ts_event), + hd: RecordHeader::new::(rtype::STATUS, publisher_id, instrument_id, ts_event), ts_recv, group: str_to_c_chars(group).map_err(to_val_err)?, trading_status, @@ -619,13 +619,13 @@ impl InstrumentDefMsg { #[new] fn py_new( publisher_id: u16, - product_id: u32, + instrument_id: u32, ts_event: u64, ts_recv: u64, min_price_increment: i64, display_factor: i64, min_lot_size_round_lot: i32, - symbol: &str, + raw_symbol: &str, group: &str, exchange: &str, instrument_class: c_char, @@ -687,7 +687,7 @@ impl InstrumentDefMsg { hd: RecordHeader::new::( rtype::INSTRUMENT_DEF, publisher_id, - product_id, + instrument_id, ts_event, ), ts_recv, @@ -727,7 +727,7 @@ impl InstrumentDefMsg { settl_currency: str_to_c_chars(settl_currency.unwrap_or_default()) .map_err(to_val_err)?, secsubtype: str_to_c_chars(secsubtype.unwrap_or_default()).map_err(to_val_err)?, - symbol: str_to_c_chars(symbol).map_err(to_val_err)?, + raw_symbol: str_to_c_chars(raw_symbol).map_err(to_val_err)?, group: str_to_c_chars(group).map_err(to_val_err)?, exchange: str_to_c_chars(exchange).map_err(to_val_err)?, asset: str_to_c_chars(asset.unwrap_or_default()).map_err(to_val_err)?, @@ -801,9 +801,9 @@ impl InstrumentDefMsg { } #[getter] - #[pyo3(name = "symbol")] - fn py_symbol(&self) -> PyResult<&str> { - self.symbol().map_err(to_val_err) + #[pyo3(name = "raw_symbol")] + fn py_raw_symbol(&self) -> PyResult<&str> { + self.raw_symbol().map_err(to_val_err) } #[getter] @@ -854,7 +854,7 @@ impl ImbalanceMsg { #[new] fn py_new( publisher_id: u16, - product_id: u32, + instrument_id: u32, ts_event: u64, ts_recv: u64, ref_price: i64, @@ -867,7 +867,7 @@ impl ImbalanceMsg { significant_imbalance: c_char, ) -> Self { Self { - hd: RecordHeader::new::(rtype::IMBALANCE, publisher_id, product_id, ts_event), + hd: RecordHeader::new::(rtype::IMBALANCE, publisher_id, instrument_id, ts_event), ts_recv, ref_price, auction_time: 0, @@ -919,7 +919,7 @@ impl StatMsg { #[new] fn py_new( publisher_id: u16, - product_id: u32, + instrument_id: u32, ts_event: u64, ts_recv: u64, ts_ref: u64, @@ -933,7 +933,7 @@ impl StatMsg { stat_flags: Option, ) -> Self { Self { - hd: RecordHeader::new::(rtype::STATISTICS, publisher_id, product_id, ts_event), + hd: RecordHeader::new::(rtype::STATISTICS, publisher_id, instrument_id, ts_event), ts_recv, ts_ref, price, @@ -1010,7 +1010,7 @@ impl SymbolMappingMsg { #[new] fn py_new( publisher_id: u16, - product_id: u32, + instrument_id: u32, ts_event: u64, stype_in_symbol: &str, stype_out_symbol: &str, @@ -1021,7 +1021,7 @@ impl SymbolMappingMsg { hd: RecordHeader::new::( rtype::SYMBOL_MAPPING, publisher_id, - product_id, + instrument_id, ts_event, ), stype_in_symbol: str_to_c_chars(stype_in_symbol).map_err(to_val_err)?, diff --git a/rust/dbn/src/record.rs b/rust/dbn/src/record.rs index eca0528..14cac8e 100644 --- a/rust/dbn/src/record.rs +++ b/rust/dbn/src/record.rs @@ -39,8 +39,8 @@ pub struct RecordHeader { pub rtype: u8, /// The publisher ID assigned by Databento. pub publisher_id: u16, - /// The numeric product ID assigned to the instrument. - pub product_id: u32, + /// The numeric ID assigned to the instrument. + pub instrument_id: u32, /// The matching-engine-received timestamp expressed as number of nanoseconds since /// the UNIX epoch. #[serde(serialize_with = "serialize_large_u64")] @@ -352,7 +352,7 @@ pub struct InstrumentDefMsg { /// A bitmap of instrument eligibility attributes. #[pyo3(get, set)] pub inst_attrib_value: i32, - /// The `product_id` of the first underlying instrument. + /// The `instrument_id` of the first underlying instrument. #[pyo3(get, set)] pub underlying_id: u32, /// The total cleared volume of the instrument traded during the prior trading session. @@ -424,9 +424,9 @@ pub struct InstrumentDefMsg { /// The strategy type of the spread. #[serde(serialize_with = "serialize_c_char_arr")] pub secsubtype: [c_char; 6], - /// The instrument name (symbol). + /// The instrument raw symbol assigned by the publisher. #[serde(serialize_with = "serialize_c_char_arr")] - pub symbol: [c_char; 22], + pub raw_symbol: [c_char; 22], /// The security group code of the instrument. #[serde(serialize_with = "serialize_c_char_arr")] pub group: [c_char; 21], @@ -742,14 +742,14 @@ impl RecordHeader { pub const fn new( rtype: u8, publisher_id: u16, - product_id: u32, + instrument_id: u32, ts_event: u64, ) -> Self { Self { length: (mem::size_of::() / Self::LENGTH_MULTIPLIER) as u8, rtype, publisher_id, - product_id, + instrument_id, ts_event, } } @@ -889,12 +889,12 @@ impl InstrumentDefMsg { c_chars_to_str(&self.secsubtype) } - /// Returns `symbol` as a `&str`. + /// Returns `raw_symbol` as a `&str`. /// /// # Errors - /// This function returns an error if `symbol` contains invalid UTF-8. - pub fn symbol(&self) -> Result<&str, Utf8Error> { - c_chars_to_str(&self.symbol) + /// This function returns an error if `raw_symbol` contains invalid UTF-8. + pub fn raw_symbol(&self) -> Result<&str, Utf8Error> { + c_chars_to_str(&self.raw_symbol) } /// Returns `exchange` as a `&str`. @@ -1518,7 +1518,7 @@ mod tests { length: 56, rtype: rtype::OHLCV_1S, publisher_id: 1, - product_id: 5482, + instrument_id: 5482, ts_event: 1609160400000000000, }, open: 372025000000000, @@ -1579,7 +1579,7 @@ mod tests { let json = serde_json::to_string(&error).unwrap(); assert_eq!( json, - r#"{"hd":{"rtype":21,"publisher_id":0,"product_id":0,"ts_event":"0"},"err":"\"A test"}"# + r#"{"hd":{"rtype":21,"publisher_id":0,"instrument_id":0,"ts_event":"0"},"err":"\"A test"}"# ); } } From ab65fbcbbe5d4c0f4149e2ded2eab9e9e62010eb Mon Sep 17 00:00:00 2001 From: Carter Green Date: Fri, 21 Apr 2023 10:39:35 -0500 Subject: [PATCH 18/22] ADD: Add sentinel value for unset stat quantity --- rust/dbn/src/lib.rs | 2 ++ rust/dbn/src/record.rs | 6 ++++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/rust/dbn/src/lib.rs b/rust/dbn/src/lib.rs index 3a128ba..4689ba5 100644 --- a/rust/dbn/src/lib.rs +++ b/rust/dbn/src/lib.rs @@ -36,3 +36,5 @@ const NULL_STYPE: u8 = u8::MAX; pub const UNDEF_PRICE: i64 = i64::MAX; /// The sentinel value for an unset or null order quantity. pub const UNDEF_ORDER_SIZE: u32 = u32::MAX; +/// The sentinel value for an unset or null stat quantity. +pub const UNDEF_STAT_QUANTITY: i32 = i32::MAX; diff --git a/rust/dbn/src/record.rs b/rust/dbn/src/record.rs index 14cac8e..fa2d235 100644 --- a/rust/dbn/src/record.rs +++ b/rust/dbn/src/record.rs @@ -602,9 +602,11 @@ pub struct StatMsg { #[serde(serialize_with = "serialize_large_u64")] pub ts_ref: u64, /// The value for price statistics expressed as a signed integer where every 1 unit - /// corresponds to 1e-9, i.e. 1/1,000,000,000 or 0.000000001. + /// corresponds to 1e-9, i.e. 1/1,000,000,000 or 0.000000001. Will be + /// [`crate::UNDEF_PRICE`] when unused. pub price: i64, - /// The value for non-price statistics. + /// The value for non-price statistics. Will be [`crate::UNDEF_STAT_QUANTITY`] when + /// unused. pub quantity: i32, /// The message sequence number assigned at the venue. pub sequence: u32, From c9469144eb0c21a1f75ff7ba39d1ec3c35d5c566 Mon Sep 17 00:00:00 2001 From: Carter Green Date: Thu, 20 Apr 2023 14:21:21 -0500 Subject: [PATCH 19/22] MOD: Improve Python DBN usability --- python/databento_dbn.pyi | 41 +++++ python/pyproject.toml | 3 + rust/dbn/src/python.rs | 378 +++++++++++++++++++++++++-------------- rust/dbn/src/record.rs | 28 +-- 4 files changed, 304 insertions(+), 146 deletions(-) diff --git a/python/databento_dbn.pyi b/python/databento_dbn.pyi index a90a63c..6177ad0 100644 --- a/python/databento_dbn.pyi +++ b/python/databento_dbn.pyi @@ -290,6 +290,47 @@ class Record(SupportsBytes): ------- int + """ + @property + def rtype(self) -> int: + """ + The record type. + + Returns + ------- + int + + """ + @property + def publisher_id(self) -> int: + """ + The publisher ID assigned by Databento. + + Returns + ------- + int + + """ + @property + def product_id(self) -> int: + """ + The numeric product ID assigned to the instrument. + + Returns + ------- + int + + """ + @property + def ts_event(self) -> int: + """ + The matching-engine-received timestamp expressed as number of + nanoseconds since the UNIX epoch. + + Returns + ------- + int + """ class _MBOBase: diff --git a/python/pyproject.toml b/python/pyproject.toml index ce51ae8..6f4abb5 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -4,6 +4,9 @@ build-backend = "maturin" [project] name = "databento-dbn" +description = "Native Databento bindings based on dbn Rust crate" +readme = "README.md" +license = { file = "../LICENSE" } requires-python = ">=3.7" classifiers = [ "Programming Language :: Rust", diff --git a/rust/dbn/src/python.rs b/rust/dbn/src/python.rs index e0e744d..7583f3e 100644 --- a/rust/dbn/src/python.rs +++ b/rust/dbn/src/python.rs @@ -29,6 +29,34 @@ use crate::{ }; use crate::{MappingInterval, Metadata, SymbolMapping}; +macro_rules! impl_repr { + () => { + fn __repr__(&self) -> String { + format!("{self:?}") + } + }; +} + +macro_rules! impl_richcmp { + () => { + fn __richcmp__(&self, other: &Self, op: CompareOp, py: Python<'_>) -> Py { + match op { + CompareOp::Eq => self.eq(other).into_py(py), + CompareOp::Ne => self.ne(other).into_py(py), + _ => py.NotImplemented(), + } + } + }; +} + +macro_rules! impl_bytes { + () => { + fn __bytes__(&self) -> &[u8] { + self.as_ref() + } + }; +} + #[pymethods] impl Metadata { #[new] @@ -62,17 +90,8 @@ impl Metadata { .build() } - fn __richcmp__(&self, other: &Metadata, op: CompareOp, py: Python<'_>) -> Py { - match op { - CompareOp::Eq => self.eq(other).into_py(py), - CompareOp::Ne => self.ne(other).into_py(py), - _ => py.NotImplemented(), - } - } - - fn __repr__(&self) -> String { - format!("{self:?}") - } + impl_richcmp! {} + impl_repr! {} /// Encodes Metadata back into DBN format. fn __bytes__(&self, py: Python<'_>) -> PyResult> { @@ -294,20 +313,28 @@ impl MboMsg { } } - fn __richcmp__(&self, other: &MboMsg, op: CompareOp, py: Python<'_>) -> Py { - match op { - CompareOp::Eq => self.eq(other).into_py(py), - CompareOp::Ne => self.ne(other).into_py(py), - _ => py.NotImplemented(), - } + impl_richcmp! {} + impl_repr! {} + impl_bytes! {} + + #[getter] + fn rtype(&self) -> u8 { + self.hd.rtype } - fn __repr__(&self) -> String { - format!("{self:?}") + #[getter] + fn publisher_id(&self) -> u16 { + self.hd.publisher_id } - fn __bytes__(&self) -> &[u8] { - self.as_ref() + #[getter] + fn product_id(&self) -> u32 { + self.hd.product_id + } + + #[getter] + fn ts_event(&self) -> u64 { + self.hd.ts_event } #[pyo3(name = "record_size")] @@ -373,20 +400,28 @@ impl TradeMsg { } } - fn __richcmp__(&self, other: &TradeMsg, op: CompareOp, py: Python<'_>) -> Py { - match op { - CompareOp::Eq => self.eq(other).into_py(py), - CompareOp::Ne => self.ne(other).into_py(py), - _ => py.NotImplemented(), - } + impl_richcmp! {} + impl_repr! {} + impl_bytes! {} + + #[getter] + fn rtype(&self) -> u8 { + self.hd.rtype } - fn __repr__(&self) -> String { - format!("{self:?}") + #[getter] + fn publisher_id(&self) -> u16 { + self.hd.publisher_id } - fn __bytes__(&self) -> &[u8] { - self.as_ref() + #[getter] + fn product_id(&self) -> u32 { + self.hd.product_id + } + + #[getter] + fn ts_event(&self) -> u64 { + self.hd.ts_event } #[pyo3(name = "record_size")] @@ -428,20 +463,28 @@ impl Mbp1Msg { } } - fn __richcmp__(&self, other: &Mbp1Msg, op: CompareOp, py: Python<'_>) -> Py { - match op { - CompareOp::Eq => self.eq(other).into_py(py), - CompareOp::Ne => self.ne(other).into_py(py), - _ => py.NotImplemented(), - } + impl_richcmp! {} + impl_repr! {} + impl_bytes! {} + + #[getter] + fn rtype(&self) -> u8 { + self.hd.rtype } - fn __repr__(&self) -> String { - format!("{self:?}") + #[getter] + fn publisher_id(&self) -> u16 { + self.hd.publisher_id } - fn __bytes__(&self) -> &[u8] { - self.as_ref() + #[getter] + fn product_id(&self) -> u32 { + self.hd.product_id + } + + #[getter] + fn ts_event(&self) -> u64 { + self.hd.ts_event } #[pyo3(name = "record_size")] @@ -495,20 +538,28 @@ impl Mbp10Msg { }) } - fn __richcmp__(&self, other: &Mbp10Msg, op: CompareOp, py: Python<'_>) -> Py { - match op { - CompareOp::Eq => self.eq(other).into_py(py), - CompareOp::Ne => self.ne(other).into_py(py), - _ => py.NotImplemented(), - } + impl_richcmp! {} + impl_repr! {} + impl_bytes! {} + + #[getter] + fn rtype(&self) -> u8 { + self.hd.rtype } - fn __repr__(&self) -> String { - format!("{self:?}") + #[getter] + fn publisher_id(&self) -> u16 { + self.hd.publisher_id } - fn __bytes__(&self) -> &[u8] { - self.as_ref() + #[getter] + fn product_id(&self) -> u32 { + self.hd.product_id + } + + #[getter] + fn ts_event(&self) -> u64 { + self.hd.ts_event } #[pyo3(name = "record_size")] @@ -541,20 +592,28 @@ impl OhlcvMsg { } } - fn __richcmp__(&self, other: &OhlcvMsg, op: CompareOp, py: Python<'_>) -> Py { - match op { - CompareOp::Eq => self.eq(other).into_py(py), - CompareOp::Ne => self.ne(other).into_py(py), - _ => py.NotImplemented(), - } + impl_richcmp! {} + impl_repr! {} + impl_bytes! {} + + #[getter] + fn rtype(&self) -> u8 { + self.hd.rtype } - fn __repr__(&self) -> String { - format!("{self:?}") + #[getter] + fn publisher_id(&self) -> u16 { + self.hd.publisher_id + } + + #[getter] + fn product_id(&self) -> u32 { + self.hd.product_id } - fn __bytes__(&self) -> &[u8] { - self.as_ref() + #[getter] + fn ts_event(&self) -> u64 { + self.hd.ts_event } #[pyo3(name = "record_size")] @@ -586,20 +645,28 @@ impl StatusMsg { }) } - fn __richcmp__(&self, other: &StatusMsg, op: CompareOp, py: Python<'_>) -> Py { - match op { - CompareOp::Eq => self.eq(other).into_py(py), - CompareOp::Ne => self.ne(other).into_py(py), - _ => py.NotImplemented(), - } + impl_richcmp! {} + impl_repr! {} + impl_bytes! {} + + #[getter] + fn rtype(&self) -> u8 { + self.hd.rtype } - fn __repr__(&self) -> String { - format!("{self:?}") + #[getter] + fn publisher_id(&self) -> u16 { + self.hd.publisher_id } - fn __bytes__(&self) -> &[u8] { - self.as_ref() + #[getter] + fn product_id(&self) -> u32 { + self.hd.product_id + } + + #[getter] + fn ts_event(&self) -> u64 { + self.hd.ts_event } #[pyo3(name = "record_size")] @@ -761,20 +828,28 @@ impl InstrumentDefMsg { }) } - fn __richcmp__(&self, other: &InstrumentDefMsg, op: CompareOp, py: Python<'_>) -> Py { - match op { - CompareOp::Eq => self.eq(other).into_py(py), - CompareOp::Ne => self.ne(other).into_py(py), - _ => py.NotImplemented(), - } + impl_richcmp! {} + impl_repr! {} + impl_bytes! {} + + #[getter] + fn rtype(&self) -> u8 { + self.hd.rtype } - fn __repr__(&self) -> String { - format!("{self:?}") + #[getter] + fn publisher_id(&self) -> u16 { + self.hd.publisher_id } - fn __bytes__(&self) -> &[u8] { - self.as_ref() + #[getter] + fn product_id(&self) -> u32 { + self.hd.product_id + } + + #[getter] + fn ts_event(&self) -> u64 { + self.hd.ts_event } #[pyo3(name = "record_size")] @@ -892,20 +967,28 @@ impl ImbalanceMsg { } } - fn __richcmp__(&self, other: &ImbalanceMsg, op: CompareOp, py: Python<'_>) -> Py { - match op { - CompareOp::Eq => self.eq(other).into_py(py), - CompareOp::Ne => self.ne(other).into_py(py), - _ => py.NotImplemented(), - } + impl_richcmp! {} + impl_repr! {} + impl_bytes! {} + + #[getter] + fn rtype(&self) -> u8 { + self.hd.rtype } - fn __repr__(&self) -> String { - format!("{self:?}") + #[getter] + fn publisher_id(&self) -> u16 { + self.hd.publisher_id } - fn __bytes__(&self) -> &[u8] { - self.as_ref() + #[getter] + fn product_id(&self) -> u32 { + self.hd.product_id + } + + #[getter] + fn ts_event(&self) -> u64 { + self.hd.ts_event } #[pyo3(name = "record_size")] @@ -948,20 +1031,28 @@ impl StatMsg { } } - fn __richcmp__(&self, other: &StatMsg, op: CompareOp, py: Python<'_>) -> Py { - match op { - CompareOp::Eq => self.eq(other).into_py(py), - CompareOp::Ne => self.ne(other).into_py(py), - _ => py.NotImplemented(), - } + impl_richcmp! {} + impl_repr! {} + impl_bytes! {} + + #[getter] + fn rtype(&self) -> u8 { + self.hd.rtype } - fn __repr__(&self) -> String { - format!("{self:?}") + #[getter] + fn publisher_id(&self) -> u16 { + self.hd.publisher_id + } + + #[getter] + fn product_id(&self) -> u32 { + self.hd.product_id } - fn __bytes__(&self) -> &[u8] { - self.as_ref() + #[getter] + fn ts_event(&self) -> u64 { + self.hd.ts_event } #[pyo3(name = "record_size")] @@ -977,20 +1068,28 @@ impl ErrorMsg { Ok(ErrorMsg::new(ts_event, err)) } - fn __richcmp__(&self, other: &ErrorMsg, op: CompareOp, py: Python<'_>) -> Py { - match op { - CompareOp::Eq => self.eq(other).into_py(py), - CompareOp::Ne => self.ne(other).into_py(py), - _ => py.NotImplemented(), - } + impl_richcmp! {} + impl_repr! {} + impl_bytes! {} + + #[getter] + fn rtype(&self) -> u8 { + self.hd.rtype } - fn __repr__(&self) -> String { - format!("{self:?}") + #[getter] + fn publisher_id(&self) -> u16 { + self.hd.publisher_id } - fn __bytes__(&self) -> &[u8] { - self.as_ref() + #[getter] + fn product_id(&self) -> u32 { + self.hd.product_id + } + + #[getter] + fn ts_event(&self) -> u64 { + self.hd.ts_event } #[pyo3(name = "record_size")] @@ -1032,20 +1131,28 @@ impl SymbolMappingMsg { }) } - fn __richcmp__(&self, other: &SymbolMappingMsg, op: CompareOp, py: Python<'_>) -> Py { - match op { - CompareOp::Eq => self.eq(other).into_py(py), - CompareOp::Ne => self.ne(other).into_py(py), - _ => py.NotImplemented(), - } + impl_richcmp! {} + impl_repr! {} + impl_bytes! {} + + #[getter] + fn rtype(&self) -> u8 { + self.hd.rtype } - fn __repr__(&self) -> String { - format!("{self:?}") + #[getter] + fn publisher_id(&self) -> u16 { + self.hd.publisher_id } - fn __bytes__(&self) -> &[u8] { - self.as_ref() + #[getter] + fn product_id(&self) -> u32 { + self.hd.product_id + } + + #[getter] + fn ts_event(&self) -> u64 { + self.hd.ts_event } #[pyo3(name = "record_size")] @@ -1073,20 +1180,28 @@ impl SystemMsg { SystemMsg::new(ts_event, msg).map_err(to_val_err) } - fn __richcmp__(&self, other: &SystemMsg, op: CompareOp, py: Python<'_>) -> Py { - match op { - CompareOp::Eq => self.eq(other).into_py(py), - CompareOp::Ne => self.ne(other).into_py(py), - _ => py.NotImplemented(), - } + impl_richcmp! {} + impl_repr! {} + impl_bytes! {} + + #[getter] + fn rtype(&self) -> u8 { + self.hd.rtype } - fn __repr__(&self) -> String { - format!("{self:?}") + #[getter] + fn publisher_id(&self) -> u16 { + self.hd.publisher_id + } + + #[getter] + fn product_id(&self) -> u32 { + self.hd.product_id } - fn __bytes__(&self) -> &[u8] { - self.as_ref() + #[getter] + fn ts_event(&self) -> u64 { + self.hd.ts_event } #[pyo3(name = "record_size")] @@ -1100,7 +1215,6 @@ impl SystemMsg { self.msg().map_err(to_val_err) } - #[getter] #[pyo3(name = "is_heartbeat")] fn py_is_heartbeat(&self) -> bool { self.is_heartbeat() diff --git a/rust/dbn/src/record.rs b/rust/dbn/src/record.rs index fa2d235..2b27ece 100644 --- a/rust/dbn/src/record.rs +++ b/rust/dbn/src/record.rs @@ -26,7 +26,7 @@ use crate::{ #[cfg_attr(feature = "trivial_copy", derive(Copy))] #[cfg_attr( feature = "python", - pyo3::pyclass(get_all, set_all, module = "databento_dbn") + pyo3::pyclass(get_all, set_all, dict, module = "databento_dbn") )] pub struct RecordHeader { /// The length of the record in 32-bit words. @@ -54,7 +54,7 @@ pub struct RecordHeader { #[cfg_attr(feature = "trivial_copy", derive(Copy))] #[cfg_attr( feature = "python", - pyo3::pyclass(get_all, set_all, module = "databento_dbn", name = "MBOMsg") + pyo3::pyclass(get_all, set_all, dict, module = "databento_dbn", name = "MBOMsg") )] pub struct MboMsg { /// The common header. @@ -95,7 +95,7 @@ pub struct MboMsg { #[cfg_attr(feature = "trivial_copy", derive(Copy))] #[cfg_attr( feature = "python", - pyo3::pyclass(get_all, set_all, module = "databento_dbn") + pyo3::pyclass(get_all, set_all, dict, module = "databento_dbn") )] pub struct BidAskPair { /// The bid price. @@ -119,7 +119,7 @@ pub struct BidAskPair { #[cfg_attr(feature = "trivial_copy", derive(Copy))] #[cfg_attr( feature = "python", - pyo3::pyclass(get_all, set_all, module = "databento_dbn") + pyo3::pyclass(get_all, set_all, dict, module = "databento_dbn", name = "TradeMsg") )] pub struct TradeMsg { /// The common header. @@ -158,7 +158,7 @@ pub struct TradeMsg { #[cfg_attr(feature = "trivial_copy", derive(Copy))] #[cfg_attr( feature = "python", - pyo3::pyclass(get_all, set_all, module = "databento_dbn", name = "MBP1Msg") + pyo3::pyclass(get_all, set_all, dict, module = "databento_dbn", name = "MBP1Msg") )] pub struct Mbp1Msg { /// The common header. @@ -199,7 +199,7 @@ pub struct Mbp1Msg { #[cfg_attr(feature = "trivial_copy", derive(Copy))] #[cfg_attr( feature = "python", - pyo3::pyclass(get_all, set_all, module = "databento_dbn", name = "MBP10Msg") + pyo3::pyclass(get_all, set_all, dict, module = "databento_dbn", name = "MBP10Msg") )] pub struct Mbp10Msg { /// The common header. @@ -246,7 +246,7 @@ pub type TbboMsg = Mbp1Msg; #[cfg_attr(feature = "trivial_copy", derive(Copy))] #[cfg_attr( feature = "python", - pyo3::pyclass(get_all, set_all, module = "databento_dbn", name = "OHLCVMsg") + pyo3::pyclass(get_all, set_all, dict, module = "databento_dbn", name = "OHLCVMsg") )] pub struct OhlcvMsg { /// The common header. @@ -269,7 +269,7 @@ pub struct OhlcvMsg { #[repr(C)] #[derive(Clone, Debug, PartialEq, Eq, Serialize)] #[cfg_attr(feature = "trivial_copy", derive(Copy))] -#[cfg_attr(feature = "python", pyo3::pyclass(module = "databento_dbn"))] +#[cfg_attr(feature = "python", pyo3::pyclass(dict, module = "databento_dbn"))] #[cfg_attr(not(feature = "python"), derive(MockPyo3))] // bring `pyo3` attribute into scope pub struct StatusMsg { /// The common header. @@ -295,7 +295,7 @@ pub struct StatusMsg { #[repr(C)] #[derive(Clone, Debug, PartialEq, Eq, Serialize)] #[cfg_attr(feature = "trivial_copy", derive(Copy))] -#[cfg_attr(feature = "python", pyo3::pyclass(module = "databento_dbn"))] +#[cfg_attr(feature = "python", pyo3::pyclass(dict, module = "databento_dbn"))] #[cfg_attr(not(feature = "python"), derive(MockPyo3))] // bring `pyo3` attribute into scope pub struct InstrumentDefMsg { /// The common header. @@ -523,7 +523,7 @@ pub struct InstrumentDefMsg { #[cfg_attr(feature = "trivial_copy", derive(Copy))] #[cfg_attr( feature = "python", - pyo3::pyclass(get_all, set_all, module = "databento_dbn") + pyo3::pyclass(get_all, set_all, dict, module = "databento_dbn") )] pub struct ImbalanceMsg { /// The common header. @@ -589,7 +589,7 @@ pub struct ImbalanceMsg { #[cfg_attr(feature = "trivial_copy", derive(Copy))] #[cfg_attr( feature = "python", - pyo3::pyclass(get_all, set_all, module = "databento_dbn") + pyo3::pyclass(get_all, set_all, dict, module = "databento_dbn") )] pub struct StatMsg { /// The common header. @@ -632,7 +632,7 @@ pub struct StatMsg { #[repr(C)] #[derive(Clone, Debug, PartialEq, Eq, Serialize)] #[cfg_attr(feature = "trivial_copy", derive(Copy))] -#[cfg_attr(feature = "python", pyo3::pyclass(module = "databento_dbn"))] +#[cfg_attr(feature = "python", pyo3::pyclass(dict, module = "databento_dbn"))] #[cfg_attr(not(feature = "python"), derive(MockPyo3))] // bring `pyo3` attribute into scope pub struct ErrorMsg { /// The common header. @@ -648,7 +648,7 @@ pub struct ErrorMsg { #[repr(C)] #[derive(Clone, Debug, PartialEq, Eq, Serialize)] #[cfg_attr(feature = "trivial_copy", derive(Copy))] -#[cfg_attr(feature = "python", pyo3::pyclass(module = "databento_dbn"))] +#[cfg_attr(feature = "python", pyo3::pyclass(dict, module = "databento_dbn"))] #[cfg_attr(not(feature = "python"), derive(MockPyo3))] // bring `pyo3` attribute into scope pub struct SymbolMappingMsg { /// The common header. @@ -679,7 +679,7 @@ pub struct SymbolMappingMsg { #[repr(C)] #[derive(Clone, Debug, PartialEq, Eq, Serialize)] #[cfg_attr(feature = "trivial_copy", derive(Copy))] -#[cfg_attr(feature = "python", pyo3::pyclass(module = "databento_dbn"))] +#[cfg_attr(feature = "python", pyo3::pyclass(dict, module = "databento_dbn"))] #[cfg_attr(not(feature = "python"), derive(MockPyo3))] // bring `pyo3` attribute into scope pub struct SystemMsg { /// The common header. From 65b595e8f9fdf594164cf9a75ff1c67898d0a32e Mon Sep 17 00:00:00 2001 From: Carter Green Date: Fri, 21 Apr 2023 16:35:20 -0500 Subject: [PATCH 20/22] FIX: Fix DBN build --- python/databento_dbn.pyi | 6 ++--- rust/dbn/src/enums.rs | 2 +- rust/dbn/src/python.rs | 48 ++++++++++++++++++++-------------------- 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/python/databento_dbn.pyi b/python/databento_dbn.pyi index 6177ad0..2649a5a 100644 --- a/python/databento_dbn.pyi +++ b/python/databento_dbn.pyi @@ -247,7 +247,7 @@ class RecordHeader: @property def instrument_id(self) -> int: """ - The numeric product ID assigned to the instrument. + The numeric ID assigned to the instrument. Returns ------- @@ -312,9 +312,9 @@ class Record(SupportsBytes): """ @property - def product_id(self) -> int: + def instrument_id(self) -> int: """ - The numeric product ID assigned to the instrument. + The numeric ID assigned to the instrument. Returns ------- diff --git a/rust/dbn/src/enums.rs b/rust/dbn/src/enums.rs index 6f29a96..e0ffe33 100644 --- a/rust/dbn/src/enums.rs +++ b/rust/dbn/src/enums.rs @@ -557,7 +557,7 @@ impl Display for Compression { /// Constants for the bit flag record fields. pub mod flags { /// Indicates it's the last message in the packet from the venue for a given - /// `product_id`. + /// `instrument_id`. pub const LAST: u8 = 1 << 7; /// Indicates the message was sourced from a replay, such as a snapshot server. pub const SNAPSHOT: u8 = 1 << 5; diff --git a/rust/dbn/src/python.rs b/rust/dbn/src/python.rs index 7583f3e..c941082 100644 --- a/rust/dbn/src/python.rs +++ b/rust/dbn/src/python.rs @@ -328,8 +328,8 @@ impl MboMsg { } #[getter] - fn product_id(&self) -> u32 { - self.hd.product_id + fn instrument_id(&self) -> u32 { + self.hd.instrument_id } #[getter] @@ -415,8 +415,8 @@ impl TradeMsg { } #[getter] - fn product_id(&self) -> u32 { - self.hd.product_id + fn instrument_id(&self) -> u32 { + self.hd.instrument_id } #[getter] @@ -478,8 +478,8 @@ impl Mbp1Msg { } #[getter] - fn product_id(&self) -> u32 { - self.hd.product_id + fn instrument_id(&self) -> u32 { + self.hd.instrument_id } #[getter] @@ -553,8 +553,8 @@ impl Mbp10Msg { } #[getter] - fn product_id(&self) -> u32 { - self.hd.product_id + fn instrument_id(&self) -> u32 { + self.hd.instrument_id } #[getter] @@ -607,8 +607,8 @@ impl OhlcvMsg { } #[getter] - fn product_id(&self) -> u32 { - self.hd.product_id + fn instrument_id(&self) -> u32 { + self.hd.instrument_id } #[getter] @@ -660,8 +660,8 @@ impl StatusMsg { } #[getter] - fn product_id(&self) -> u32 { - self.hd.product_id + fn instrument_id(&self) -> u32 { + self.hd.instrument_id } #[getter] @@ -843,8 +843,8 @@ impl InstrumentDefMsg { } #[getter] - fn product_id(&self) -> u32 { - self.hd.product_id + fn instrument_id(&self) -> u32 { + self.hd.instrument_id } #[getter] @@ -982,8 +982,8 @@ impl ImbalanceMsg { } #[getter] - fn product_id(&self) -> u32 { - self.hd.product_id + fn instrument_id(&self) -> u32 { + self.hd.instrument_id } #[getter] @@ -1046,8 +1046,8 @@ impl StatMsg { } #[getter] - fn product_id(&self) -> u32 { - self.hd.product_id + fn instrument_id(&self) -> u32 { + self.hd.instrument_id } #[getter] @@ -1083,8 +1083,8 @@ impl ErrorMsg { } #[getter] - fn product_id(&self) -> u32 { - self.hd.product_id + fn instrument_id(&self) -> u32 { + self.hd.instrument_id } #[getter] @@ -1146,8 +1146,8 @@ impl SymbolMappingMsg { } #[getter] - fn product_id(&self) -> u32 { - self.hd.product_id + fn instrument_id(&self) -> u32 { + self.hd.instrument_id } #[getter] @@ -1195,8 +1195,8 @@ impl SystemMsg { } #[getter] - fn product_id(&self) -> u32 { - self.hd.product_id + fn instrument_id(&self) -> u32 { + self.hd.instrument_id } #[getter] From 501f2bc56c052e40d4df1caf07e629a68dbae559 Mon Sep 17 00:00:00 2001 From: Nick Macholl Date: Mon, 24 Apr 2023 15:15:46 -0700 Subject: [PATCH 21/22] FIX: Revert bytes, repr, and richcmp macros --- rust/dbn/src/python.rs | 256 ++++++++++++++++++++++++++++++----------- 1 file changed, 190 insertions(+), 66 deletions(-) diff --git a/rust/dbn/src/python.rs b/rust/dbn/src/python.rs index c941082..a8ffad1 100644 --- a/rust/dbn/src/python.rs +++ b/rust/dbn/src/python.rs @@ -29,34 +29,6 @@ use crate::{ }; use crate::{MappingInterval, Metadata, SymbolMapping}; -macro_rules! impl_repr { - () => { - fn __repr__(&self) -> String { - format!("{self:?}") - } - }; -} - -macro_rules! impl_richcmp { - () => { - fn __richcmp__(&self, other: &Self, op: CompareOp, py: Python<'_>) -> Py { - match op { - CompareOp::Eq => self.eq(other).into_py(py), - CompareOp::Ne => self.ne(other).into_py(py), - _ => py.NotImplemented(), - } - } - }; -} - -macro_rules! impl_bytes { - () => { - fn __bytes__(&self) -> &[u8] { - self.as_ref() - } - }; -} - #[pymethods] impl Metadata { #[new] @@ -90,8 +62,17 @@ impl Metadata { .build() } - impl_richcmp! {} - impl_repr! {} + fn __richcmp__(&self, other: &Self, op: CompareOp, py: Python<'_>) -> Py { + match op { + CompareOp::Eq => self.eq(other).into_py(py), + CompareOp::Ne => self.ne(other).into_py(py), + _ => py.NotImplemented(), + } + } + + fn __repr__(&self) -> String { + format!("{self:?}") + } /// Encodes Metadata back into DBN format. fn __bytes__(&self, py: Python<'_>) -> PyResult> { @@ -313,9 +294,21 @@ impl MboMsg { } } - impl_richcmp! {} - impl_repr! {} - impl_bytes! {} + fn __bytes__(&self) -> &[u8] { + self.as_ref() + } + + fn __richcmp__(&self, other: &Self, op: CompareOp, py: Python<'_>) -> Py { + match op { + CompareOp::Eq => self.eq(other).into_py(py), + CompareOp::Ne => self.ne(other).into_py(py), + _ => py.NotImplemented(), + } + } + + fn __repr__(&self) -> String { + format!("{self:?}") + } #[getter] fn rtype(&self) -> u8 { @@ -400,9 +393,21 @@ impl TradeMsg { } } - impl_richcmp! {} - impl_repr! {} - impl_bytes! {} + fn __bytes__(&self) -> &[u8] { + self.as_ref() + } + + fn __richcmp__(&self, other: &Self, op: CompareOp, py: Python<'_>) -> Py { + match op { + CompareOp::Eq => self.eq(other).into_py(py), + CompareOp::Ne => self.ne(other).into_py(py), + _ => py.NotImplemented(), + } + } + + fn __repr__(&self) -> String { + format!("{self:?}") + } #[getter] fn rtype(&self) -> u8 { @@ -463,9 +468,21 @@ impl Mbp1Msg { } } - impl_richcmp! {} - impl_repr! {} - impl_bytes! {} + fn __bytes__(&self) -> &[u8] { + self.as_ref() + } + + fn __richcmp__(&self, other: &Self, op: CompareOp, py: Python<'_>) -> Py { + match op { + CompareOp::Eq => self.eq(other).into_py(py), + CompareOp::Ne => self.ne(other).into_py(py), + _ => py.NotImplemented(), + } + } + + fn __repr__(&self) -> String { + format!("{self:?}") + } #[getter] fn rtype(&self) -> u8 { @@ -538,9 +555,21 @@ impl Mbp10Msg { }) } - impl_richcmp! {} - impl_repr! {} - impl_bytes! {} + fn __bytes__(&self) -> &[u8] { + self.as_ref() + } + + fn __richcmp__(&self, other: &Self, op: CompareOp, py: Python<'_>) -> Py { + match op { + CompareOp::Eq => self.eq(other).into_py(py), + CompareOp::Ne => self.ne(other).into_py(py), + _ => py.NotImplemented(), + } + } + + fn __repr__(&self) -> String { + format!("{self:?}") + } #[getter] fn rtype(&self) -> u8 { @@ -592,9 +621,21 @@ impl OhlcvMsg { } } - impl_richcmp! {} - impl_repr! {} - impl_bytes! {} + fn __bytes__(&self) -> &[u8] { + self.as_ref() + } + + fn __richcmp__(&self, other: &Self, op: CompareOp, py: Python<'_>) -> Py { + match op { + CompareOp::Eq => self.eq(other).into_py(py), + CompareOp::Ne => self.ne(other).into_py(py), + _ => py.NotImplemented(), + } + } + + fn __repr__(&self) -> String { + format!("{self:?}") + } #[getter] fn rtype(&self) -> u8 { @@ -645,9 +686,21 @@ impl StatusMsg { }) } - impl_richcmp! {} - impl_repr! {} - impl_bytes! {} + fn __bytes__(&self) -> &[u8] { + self.as_ref() + } + + fn __richcmp__(&self, other: &Self, op: CompareOp, py: Python<'_>) -> Py { + match op { + CompareOp::Eq => self.eq(other).into_py(py), + CompareOp::Ne => self.ne(other).into_py(py), + _ => py.NotImplemented(), + } + } + + fn __repr__(&self) -> String { + format!("{self:?}") + } #[getter] fn rtype(&self) -> u8 { @@ -828,9 +881,21 @@ impl InstrumentDefMsg { }) } - impl_richcmp! {} - impl_repr! {} - impl_bytes! {} + fn __bytes__(&self) -> &[u8] { + self.as_ref() + } + + fn __richcmp__(&self, other: &Self, op: CompareOp, py: Python<'_>) -> Py { + match op { + CompareOp::Eq => self.eq(other).into_py(py), + CompareOp::Ne => self.ne(other).into_py(py), + _ => py.NotImplemented(), + } + } + + fn __repr__(&self) -> String { + format!("{self:?}") + } #[getter] fn rtype(&self) -> u8 { @@ -967,9 +1032,21 @@ impl ImbalanceMsg { } } - impl_richcmp! {} - impl_repr! {} - impl_bytes! {} + fn __bytes__(&self) -> &[u8] { + self.as_ref() + } + + fn __richcmp__(&self, other: &Self, op: CompareOp, py: Python<'_>) -> Py { + match op { + CompareOp::Eq => self.eq(other).into_py(py), + CompareOp::Ne => self.ne(other).into_py(py), + _ => py.NotImplemented(), + } + } + + fn __repr__(&self) -> String { + format!("{self:?}") + } #[getter] fn rtype(&self) -> u8 { @@ -1031,10 +1108,21 @@ impl StatMsg { } } - impl_richcmp! {} - impl_repr! {} - impl_bytes! {} + fn __bytes__(&self) -> &[u8] { + self.as_ref() + } + + fn __richcmp__(&self, other: &Self, op: CompareOp, py: Python<'_>) -> Py { + match op { + CompareOp::Eq => self.eq(other).into_py(py), + CompareOp::Ne => self.ne(other).into_py(py), + _ => py.NotImplemented(), + } + } + fn __repr__(&self) -> String { + format!("{self:?}") + } #[getter] fn rtype(&self) -> u8 { self.hd.rtype @@ -1068,9 +1156,21 @@ impl ErrorMsg { Ok(ErrorMsg::new(ts_event, err)) } - impl_richcmp! {} - impl_repr! {} - impl_bytes! {} + fn __bytes__(&self) -> &[u8] { + self.as_ref() + } + + fn __richcmp__(&self, other: &Self, op: CompareOp, py: Python<'_>) -> Py { + match op { + CompareOp::Eq => self.eq(other).into_py(py), + CompareOp::Ne => self.ne(other).into_py(py), + _ => py.NotImplemented(), + } + } + + fn __repr__(&self) -> String { + format!("{self:?}") + } #[getter] fn rtype(&self) -> u8 { @@ -1131,9 +1231,21 @@ impl SymbolMappingMsg { }) } - impl_richcmp! {} - impl_repr! {} - impl_bytes! {} + fn __bytes__(&self) -> &[u8] { + self.as_ref() + } + + fn __richcmp__(&self, other: &Self, op: CompareOp, py: Python<'_>) -> Py { + match op { + CompareOp::Eq => self.eq(other).into_py(py), + CompareOp::Ne => self.ne(other).into_py(py), + _ => py.NotImplemented(), + } + } + + fn __repr__(&self) -> String { + format!("{self:?}") + } #[getter] fn rtype(&self) -> u8 { @@ -1180,9 +1292,21 @@ impl SystemMsg { SystemMsg::new(ts_event, msg).map_err(to_val_err) } - impl_richcmp! {} - impl_repr! {} - impl_bytes! {} + fn __bytes__(&self) -> &[u8] { + self.as_ref() + } + + fn __richcmp__(&self, other: &Self, op: CompareOp, py: Python<'_>) -> Py { + match op { + CompareOp::Eq => self.eq(other).into_py(py), + CompareOp::Ne => self.ne(other).into_py(py), + _ => py.NotImplemented(), + } + } + + fn __repr__(&self) -> String { + format!("{self:?}") + } #[getter] fn rtype(&self) -> u8 { From 9144f63e1844a53af0f37aaffffe1ee57b0327c9 Mon Sep 17 00:00:00 2001 From: Carter Green Date: Tue, 25 Apr 2023 09:15:09 -0500 Subject: [PATCH 22/22] VER: Release DBN 0.5.0 --- CHANGELOG.md | 2 +- Cargo.lock | 14 +++++++------- c/Cargo.toml | 4 ++-- python/Cargo.toml | 2 +- rust/dbn-cli/Cargo.toml | 4 ++-- rust/dbn-macros/Cargo.toml | 2 +- rust/dbn/Cargo.toml | 4 ++-- rust/dbn/src/enums.rs | 6 +++--- 8 files changed, 19 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 99bf208..9b7d433 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Changelog -## 0.5.0 - TBD +## 0.5.0 - 2023-04-25 - Added support for Statistics schema - Changed `schema` and `stype_in` to optional in `Metadata` to support live data - Renamed `SType::ProductId` to `SType::InstrumentId` and `SType::Native` to `SType::RawSymbol` diff --git a/Cargo.lock b/Cargo.lock index 8031388..812559d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -229,7 +229,7 @@ dependencies = [ [[package]] name = "databento-dbn" -version = "0.4.3" +version = "0.5.0" dependencies = [ "dbn", "pyo3", @@ -238,7 +238,7 @@ dependencies = [ [[package]] name = "dbn" -version = "0.4.3" +version = "0.5.0" dependencies = [ "anyhow", "async-compression", @@ -256,7 +256,7 @@ dependencies = [ [[package]] name = "dbn-c" -version = "0.4.3" +version = "0.5.0" dependencies = [ "cbindgen", "dbn", @@ -265,7 +265,7 @@ dependencies = [ [[package]] name = "dbn-cli" -version = "0.4.3" +version = "0.5.0" dependencies = [ "anyhow", "assert_cmd", @@ -278,7 +278,7 @@ dependencies = [ [[package]] name = "dbn-macros" -version = "0.4.3" +version = "0.5.0" [[package]] name = "difflib" @@ -444,9 +444,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.141" +version = "0.2.142" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3304a64d199bb964be99741b7a14d26972741915b3649639149b2479bb46f4b5" +checksum = "6a987beff54b60ffa6d51982e1aa1146bc42f19bd26be28b0586f252fccf5317" [[package]] name = "linux-raw-sys" diff --git a/c/Cargo.toml b/c/Cargo.toml index 4c2ce34..39145b1 100644 --- a/c/Cargo.toml +++ b/c/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "dbn-c" authors = ["Databento "] -version = "0.4.3" +version = "0.5.0" edition = "2021" description = "C bindings for working with Databento Binary Encoding (DBN)" license = "Apache-2.0" @@ -16,7 +16,7 @@ crate-type = ["cdylib"] [dependencies] # DBN library dbn = { path = "../rust/dbn", features = [] } -libc = "0.2.141" +libc = "0.2.142" [build-dependencies] cbindgen = { version = "0.24.3", default_features = false } diff --git a/python/Cargo.toml b/python/Cargo.toml index 6cb6241..bd4e6cd 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "databento-dbn" authors = ["Databento "] -version = "0.4.3" +version = "0.5.0" edition = "2021" description = "Python library written in Rust for working with Databento Binary Encoding (DBN)" license = "Apache-2.0" diff --git a/rust/dbn-cli/Cargo.toml b/rust/dbn-cli/Cargo.toml index 72a55b1..f86f81b 100644 --- a/rust/dbn-cli/Cargo.toml +++ b/rust/dbn-cli/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "dbn-cli" authors = ["Databento "] -version = "0.4.3" +version = "0.5.0" edition = "2021" description = "Command-line utility for converting Databento Binary Encoding (DBN) files to text-based formats" default-run = "dbn" @@ -17,7 +17,7 @@ path = "src/main.rs" [dependencies] # Databento common DBN library -dbn = { path = "../dbn", version = "=0.4.3" } +dbn = { path = "../dbn", version = "=0.5.0" } # Error handling anyhow = "1.0.70" diff --git a/rust/dbn-macros/Cargo.toml b/rust/dbn-macros/Cargo.toml index ba3985c..89e0f83 100644 --- a/rust/dbn-macros/Cargo.toml +++ b/rust/dbn-macros/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "dbn-macros" authors = ["Databento "] -version = "0.4.3" +version = "0.5.0" edition = "2021" description = "Proc macros for dbn crate" license = "Apache-2.0" diff --git a/rust/dbn/Cargo.toml b/rust/dbn/Cargo.toml index dfa43c5..1e5eb8b 100644 --- a/rust/dbn/Cargo.toml +++ b/rust/dbn/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "dbn" authors = ["Databento "] -version = "0.4.3" +version = "0.5.0" edition = "2021" description = "Library for working with Databento Binary Encoding (DBN)" license = "Apache-2.0" @@ -18,7 +18,7 @@ python = ["pyo3"] trivial_copy = [] [dependencies] -dbn-macros = { version = "=0.4.3", path = "../dbn-macros" } +dbn-macros = { version = "=0.5.0", path = "../dbn-macros" } # error handling anyhow = "1.0" diff --git a/rust/dbn/src/enums.rs b/rust/dbn/src/enums.rs index e0ffe33..cf97772 100644 --- a/rust/dbn/src/enums.rs +++ b/rust/dbn/src/enums.rs @@ -591,7 +591,7 @@ impl Serialize for SecurityUpdateAction { } } -/// The type of [`StatMsg`](crate::record::StatMsg) update. +/// The type of statistic contained in a [`StatMsg`](crate::record::StatMsg). #[repr(u16)] #[derive(Clone, Copy, Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)] pub enum StatType { @@ -630,8 +630,8 @@ pub enum StatType { #[repr(u8)] #[derive(Clone, Copy, Debug, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)] pub enum StatUpdateAction { - /// + /// A new statistic. New = 1, - /// + /// A removal of a statistic. Delete = 2, }