diff --git a/object/src/lib.rs b/object/src/lib.rs index 97681da5c..ebb598315 100644 --- a/object/src/lib.rs +++ b/object/src/lib.rs @@ -427,7 +427,7 @@ where let cs = SpecificCharacterSet::Default; let mut dset_writer = DataSetWriter::with_ts_cs(to, ts, cs).context(CreatePrinterSnafu)?; - // write object + // We use the default options, because only the inner object knows if something needs to change dset_writer .write_sequence((&self.obj).into_tokens()) .context(PrintDataSetSnafu)?; @@ -460,7 +460,7 @@ where let cs = SpecificCharacterSet::Default; let mut dset_writer = DataSetWriter::with_ts_cs(to, ts, cs).context(CreatePrinterSnafu)?; - // write object + // We use the default options, because only the inner object knows if something needs to change dset_writer .write_sequence((&self.obj).into_tokens()) .context(PrintDataSetSnafu)?; diff --git a/object/src/mem.rs b/object/src/mem.rs index 61c9a93c0..1b9135b8a 100644 --- a/object/src/mem.rs +++ b/object/src/mem.rs @@ -69,7 +69,7 @@ use dicom_core::{DataElement, Length, PrimitiveValue, Tag, VR}; use dicom_dictionary_std::{tags, StandardDataDictionary}; use dicom_encoding::transfer_syntax::TransferSyntaxIndex; use dicom_encoding::{encode::EncodeTo, text::SpecificCharacterSet, TransferSyntax}; -use dicom_parser::dataset::{DataSetReader, DataToken}; +use dicom_parser::dataset::{DataSetReader, DataToken, IntoTokensOptions}; use dicom_parser::{ dataset::{read::Error as ParserError, DataSetWriter, IntoTokens}, StatefulDecode, @@ -100,6 +100,10 @@ pub struct InMemDicomObject { /// It is usually undefined, unless it is part of an item /// in a sequence with a specified length in its item header. len: Length, + /// In case the SpecificCharSet changes we need to mark the object as dirty, + /// because changing the character set may change the length in bytes of + /// stored text. It has to be public for now because we need + pub(crate) charset_changed: bool, } impl PartialEq for InMemDicomObject { @@ -168,6 +172,7 @@ impl InMemDicomObject { entries: BTreeMap::new(), dict: StandardDataDictionary, len: Length::UNDEFINED, + charset_changed: false, } } @@ -269,6 +274,7 @@ where entries: BTreeMap::new(), dict, len: Length::UNDEFINED, + charset_changed: false, }, } } @@ -481,6 +487,7 @@ impl FileDicomObject> { entries: BTreeMap::new(), dict: StandardDataDictionary, len: Length::UNDEFINED, + charset_changed: false, }, } } @@ -497,6 +504,7 @@ where entries: BTreeMap::new(), dict, len: Length::UNDEFINED, + charset_changed: false, } } @@ -510,6 +518,7 @@ where entries: entries?, dict, len: Length::UNDEFINED, + charset_changed: false, }) } @@ -523,6 +532,7 @@ where entries, dict, len: Length::UNDEFINED, + charset_changed: false, } } @@ -559,6 +569,7 @@ where entries, dict, len: Length::UNDEFINED, + charset_changed: false, } } @@ -689,14 +700,19 @@ where /// Insert a data element to the object, replacing (and returning) any /// previous element of the same attribute. + /// This might invalidate all sequence and item lengths if the charset of the + /// element changes. pub fn put(&mut self, elt: InMemElement) -> Option> { self.put_element(elt) } /// Insert a data element to the object, replacing (and returning) any /// previous element of the same attribute. + /// This might invalidate all sequence and item lengths if the charset of the + /// element changes. pub fn put_element(&mut self, elt: InMemElement) -> Option> { self.len = Length::UNDEFINED; + self.invalidate_if_charset_changed(elt.tag()); self.entries.insert(elt.tag(), elt) } @@ -815,6 +831,7 @@ where tag: Tag, f: impl FnMut(&mut Value, InMemFragment>), ) -> bool { + self.invalidate_if_charset_changed(tag); if let Some(e) = self.entries.get_mut(&tag) { e.update_value(f); self.len = Length::UNDEFINED; @@ -865,7 +882,7 @@ where MissingLeafElementSnafu { selector: selector.clone(), } - }) + }); } // navigate further down AttributeSelectorStep::Nested { tag, item } => { @@ -898,6 +915,15 @@ where unreachable!() } + /// Change the 'specific_character_set' tag to ISO_IR 192, marking the dataset as UTF-8 + pub fn convert_to_utf8(&mut self) { + self.put(DataElement::new( + tags::SPECIFIC_CHARACTER_SET, + VR::CS, + "ISO_IR 192", + )); + } + /// Apply the given attribute operation on this object. /// /// See the [`dicom_core::ops`] module @@ -979,6 +1005,7 @@ where } fn apply_leaf(&mut self, tag: Tag, action: AttributeAction) -> ApplyResult { + self.invalidate_if_charset_changed(tag); match action { AttributeAction::Remove => { self.remove_element(tag); @@ -1054,6 +1081,8 @@ where } fn apply_change_value_impl(&mut self, tag: Tag, new_value: PrimitiveValue) { + self.invalidate_if_charset_changed(tag); + if let Some(e) = self.entries.get_mut(&tag) { let vr = e.vr(); // handle edge case: if VR is SQ and suggested value is empty, @@ -1085,11 +1114,18 @@ where } } + fn invalidate_if_charset_changed(&mut self, tag: Tag) { + if tag == tags::SPECIFIC_CHARACTER_SET { + self.charset_changed = true; + } + } + fn apply_push_str_impl(&mut self, tag: Tag, string: Cow<'static, str>) -> ApplyResult { if let Some(e) = self.entries.remove(&tag) { let (header, value) = e.into_parts(); match value { Value::Primitive(mut v) => { + self.invalidate_if_charset_changed(tag); // extend value v.extend_str([string]).context(ModifySnafu)?; // reinsert element @@ -1336,10 +1372,10 @@ where { // prepare data set writer let mut dset_writer = DataSetWriter::new(to, encoder); - + let required_options = IntoTokensOptions::new(self.charset_changed); // write object dset_writer - .write_sequence(self.into_tokens()) + .write_sequence(self.into_tokens_with_options(required_options)) .context(PrintDataSetSnafu)?; Ok(()) @@ -1362,10 +1398,11 @@ where { // prepare data set writer let mut dset_writer = DataSetWriter::with_ts_cs(to, ts, cs).context(CreatePrinterSnafu)?; + let required_options = IntoTokensOptions::new(self.charset_changed); // write object dset_writer - .write_sequence(self.into_tokens()) + .write_sequence(self.into_tokens_with_options(required_options)) .context(PrintDataSetSnafu)?; Ok(()) @@ -1520,14 +1557,24 @@ where } DataToken::ItemEnd if in_item => { // end of item, leave now - return Ok(InMemDicomObject { entries, dict, len }); + return Ok(InMemDicomObject { + entries, + dict, + len, + charset_changed: false, + }); } token => return UnexpectedTokenSnafu { token }.fail(), }; entries.insert(elem.tag(), elem); } - Ok(InMemDicomObject { entries, dict, len }) + Ok(InMemDicomObject { + entries, + dict, + len, + charset_changed: false, + }) } /// Build an encapsulated pixel data by collecting all fragments into an @@ -1699,7 +1746,6 @@ fn even_len(l: u32) -> u32 { #[cfg(test)] mod tests { - use super::*; use crate::open_file; use byteordered::Endianness; @@ -2657,7 +2703,7 @@ mod tests { Some(&DataElement::new( tags::INSTITUTION_NAME, VR::LO, - PrimitiveValue::from("Test Hospital") + PrimitiveValue::from("Test Hospital"), )) ); @@ -2674,7 +2720,7 @@ mod tests { Some(&DataElement::new( tags::INSTITUTION_NAME, VR::LO, - PrimitiveValue::from("REMOVED") + PrimitiveValue::from("REMOVED"), )) ); @@ -2702,7 +2748,7 @@ mod tests { Some(&DataElement::new( tags::REQUESTING_PHYSICIAN, VR::PN, - PrimitiveValue::from("Doctor^Anonymous") + PrimitiveValue::from("Doctor^Anonymous"), )) ); } @@ -2721,7 +2767,7 @@ mod tests { Some(&DataElement::new( tags::REQUESTING_PHYSICIAN, VR::PN, - PrimitiveValue::from("Doctor^Anonymous") + PrimitiveValue::from("Doctor^Anonymous"), )) ); } @@ -2985,6 +3031,7 @@ mod tests { entries, dict: StandardDataDictionary, len: Length(1), + charset_changed: false, }; assert!(obj.length().is_defined()); @@ -3277,4 +3324,131 @@ mod tests { .length() .is_undefined()); } + + #[test] + fn deep_sequence_change_encoding_writes_undefined_sequence_length() { + use smallvec::smallvec; + + let obj_1 = InMemDicomObject::from_element_iter(vec![ + //The length of this string is 20 bytes in ISO_IR 100 but should be 22 bytes in ISO_IR 192 (UTF-8) + DataElement::new( + tags::STUDY_DESCRIPTION, + VR::SL, + Value::Primitive("MORFOLOGÍA Y FUNCIÓN".into()), + ), + //ISO_IR 100 and ISO_IR 192 length are the same + DataElement::new( + tags::SERIES_DESCRIPTION, + VR::SL, + Value::Primitive("0123456789".into()), + ), + ]); + + let some_tag = Tag(0x0018, 0x6011); + + let inner_sequence = InMemDicomObject::from_element_iter(vec![DataElement::new( + some_tag, + VR::SQ, + Value::from(DataSetSequence::new( + smallvec![obj_1], + Length(30), //20 bytes from study, 10 from series + )), + )]); + let outer_sequence = DataElement::new( + some_tag, + VR::SQ, + Value::from(DataSetSequence::new( + smallvec![inner_sequence.clone(), inner_sequence], + Length(60), //20 bytes from study, 10 from series + )), + ); + + let original_object = InMemDicomObject::from_element_iter(vec![ + DataElement::new(tags::SPECIFIC_CHARACTER_SET, VR::CS, "ISO_IR 100"), + outer_sequence, + ]); + + assert_eq!( + original_object + .get(some_tag) + .expect("object should be present") + .length(), + Length(60) + ); + + let mut changed_charset = original_object.clone(); + changed_charset.convert_to_utf8(); + assert!(changed_charset.charset_changed); + + use dicom_parser::dataset::DataToken as token; + let options = IntoTokensOptions::new(true); + let converted_tokens: Vec<_> = changed_charset.into_tokens_with_options(options).collect(); + + assert_eq!( + vec![ + token::ElementHeader(DataElementHeader { + tag: Tag(0x0008, 0x0005), + vr: VR::CS, + len: Length(10), + }), + token::PrimitiveValue("ISO_IR 192".into()), + token::SequenceStart { + tag: Tag(0x0018, 0x6011), + len: Length::UNDEFINED, + }, + token::ItemStart { + len: Length::UNDEFINED + }, + token::SequenceStart { + tag: Tag(0x0018, 0x6011), + len: Length::UNDEFINED, + }, + token::ItemStart { + len: Length::UNDEFINED + }, + token::ElementHeader(DataElementHeader { + tag: Tag(0x0008, 0x1030), + vr: VR::SL, + len: Length(22), + }), + token::PrimitiveValue("MORFOLOGÍA Y FUNCIÓN".into()), + token::ElementHeader(DataElementHeader { + tag: Tag(0x0008, 0x103E), + vr: VR::SL, + len: Length(10), + }), + token::PrimitiveValue("0123456789".into()), + token::ItemEnd, + token::SequenceEnd, + token::ItemEnd, + token::ItemStart { + len: Length::UNDEFINED + }, + token::SequenceStart { + tag: Tag(0x0018, 0x6011), + len: Length::UNDEFINED, + }, + token::ItemStart { + len: Length::UNDEFINED + }, + token::ElementHeader(DataElementHeader { + tag: Tag(0x0008, 0x1030), + vr: VR::SL, + len: Length(22), + }), + token::PrimitiveValue("MORFOLOGÍA Y FUNCIÓN".into()), + token::ElementHeader(DataElementHeader { + tag: Tag(0x0008, 0x103E), + vr: VR::SL, + len: Length(10), + }), + token::PrimitiveValue("0123456789".into()), + token::ItemEnd, + token::SequenceEnd, + token::ItemEnd, + token::SequenceEnd, + ], + converted_tokens + ); + } } diff --git a/object/src/meta.rs b/object/src/meta.rs index 1bf24c4c6..a928579b9 100644 --- a/object/src/meta.rs +++ b/object/src/meta.rs @@ -781,6 +781,8 @@ impl FileMetaTable { writer, EncoderFor::new(ExplicitVRLittleEndianEncoder::default()), ); + //There are no sequences in the `FileMetaTable`, so the value of `invalidate_sq_len` is + //not important dset.write_sequence( self.clone() .into_element_iter() diff --git a/object/src/tokens.rs b/object/src/tokens.rs index 5b9a1cecd..00170b687 100644 --- a/object/src/tokens.rs +++ b/object/src/tokens.rs @@ -1,7 +1,7 @@ //! Convertion of DICOM objects into tokens. use crate::mem::InMemDicomObject; use dicom_core::DataElement; -use dicom_parser::dataset::{DataToken, IntoTokens}; +use dicom_parser::dataset::{DataToken, IntoTokens, IntoTokensOptions}; use std::collections::VecDeque; /// A stream of tokens from a DICOM object. @@ -12,6 +12,8 @@ pub struct InMemObjectTokens { elem_iter: E, /// whether the tokens are done fused: bool, + /// Options to take into account when generating tokens + token_options: IntoTokensOptions, } impl InMemObjectTokens @@ -26,6 +28,19 @@ where tokens_pending: Default::default(), elem_iter: obj.into_iter(), fused: false, + token_options: Default::default(), + } + } + + pub fn new_with_options(obj: T, token_options: IntoTokensOptions) -> Self + where + T: IntoIterator, + { + InMemObjectTokens { + tokens_pending: Default::default(), + elem_iter: obj.into_iter(), + fused: false, + token_options, } } } @@ -49,8 +64,12 @@ where // otherwise, expand next element, recurse if let Some(elem) = self.elem_iter.next() { - // TODO eventually optimize this to be less eager - self.tokens_pending = elem.into_tokens().collect(); + self.tokens_pending = if self.token_options == Default::default() { + elem.into_tokens() + } else { + elem.into_tokens_with_options(self.token_options) + } + .collect(); self.next() } else { @@ -72,6 +91,12 @@ impl IntoTokens for InMemDicomObject { fn into_tokens(self) -> Self::Iter { InMemObjectTokens::new(self) } + + fn into_tokens_with_options(self, mut options: IntoTokensOptions) -> Self::Iter { + //This is required for recursing with the correct option + options.force_invalidate_sq_length |= self.charset_changed; + InMemObjectTokens::new_with_options(self, options) + } } impl<'a, D> IntoTokens for &'a InMemDicomObject @@ -82,6 +107,12 @@ where InMemObjectTokens as IntoIterator>::IntoIter>>; fn into_tokens(self) -> Self::Iter { - InMemObjectTokens::new(self.into_iter().cloned()) + self.into_tokens_with_options(Default::default()) + } + + fn into_tokens_with_options(self, mut options: IntoTokensOptions) -> Self::Iter { + options.force_invalidate_sq_length |= self.charset_changed; + + InMemObjectTokens::new_with_options(self.into_iter().cloned(), options) } } diff --git a/parser/src/dataset/mod.rs b/parser/src/dataset/mod.rs index 4b1191f68..b897219a5 100644 --- a/parser/src/dataset/mod.rs +++ b/parser/src/dataset/mod.rs @@ -4,6 +4,7 @@ use dicom_core::header::{DataElementHeader, HasLength, Length, VR}; use dicom_core::value::{DicomValueType, PrimitiveValue}; use dicom_core::{value::Value, DataElement, Tag}; use snafu::{OptionExt, ResultExt, Snafu}; +use std::default::Default; use std::fmt; pub mod lazy_read; @@ -27,6 +28,7 @@ pub enum Error { /// Unexpected undefined value length UndefinedLength, } + pub type Result = std::result::Result; /// A token of a DICOM data set stream. This is part of the interpretation of a @@ -415,13 +417,38 @@ pub enum SeqTokenType { Item, } +/// Options for token generation +#[derive(Debug, Copy, Clone, Default, Eq, PartialEq)] +#[non_exhaustive] +pub struct IntoTokensOptions { + /// Whether to ignore all sequence lengths in the DICOM data set, + /// resulting in sequences with undefined length. + /// + /// Set this to `true` when the sequence lengths in bytes might no longer be valid, + /// such as when changing the character set, + /// and as such data set sequence lengths should be replaced with undefined. + /// When set to `false`, + /// whether to retain or replace these lengths + /// is left at the implementation's discretion. + /// either be recalculated or marked as undefined. + pub force_invalidate_sq_length: bool, +} + +impl IntoTokensOptions { + pub fn new(force_invalidate_sq_length: bool) -> Self { + IntoTokensOptions { + force_invalidate_sq_length, + } + } +} + /// A trait for converting structured DICOM data into a stream of data tokens. pub trait IntoTokens { /// The iterator type through which tokens are obtained. type Iter: Iterator; - /// Convert the value into tokens. fn into_tokens(self) -> Self::Iter; + fn into_tokens_with_options(self, options: IntoTokensOptions) -> Self::Iter; } impl IntoTokens for dicom_core::header::EmptyObject { @@ -430,6 +457,10 @@ impl IntoTokens for dicom_core::header::EmptyObject { fn into_tokens(self) -> Self::Iter { unreachable!() } + + fn into_tokens_with_options(self, _options: IntoTokensOptions) -> Self::Iter { + unreachable!() + } } /// Token generator from a DICOM data element. @@ -442,6 +473,7 @@ where // Option is used for easy taking from a &mut, // should always be Some in practice Option>, + IntoTokensOptions, ), /// the header of a plain primitive element was read Header( @@ -489,10 +521,14 @@ where fn next(&mut self) -> Option { let (out, next_state) = match self { - DataElementTokens::Start(elem) => { + DataElementTokens::Start(elem, options) => { let elem = elem.take().unwrap(); // data element header token - let header = *elem.header(); + + let mut header = *elem.header(); + if options.force_invalidate_sq_length && elem.vr() == VR::SQ { + header.len = Length::UNDEFINED; + } let token = DataToken::from(header); match token { @@ -501,12 +537,23 @@ where match elem.into_value() { Value::Primitive(_) | Value::PixelSequence { .. } => unreachable!(), Value::Sequence(seq) => { + let seq = if options.force_invalidate_sq_length { + seq.into_items().into_vec().into() + } else { + seq + }; + let items: dicom_core::value::C<_> = seq .into_items() .into_iter() .map(|o| AsItem(o.length(), o)) .collect(); - (Some(token), DataElementTokens::Items(items.into_tokens())) + ( + Some(token), + DataElementTokens::Items( + items.into_tokens_with_options(*options), + ), + ) } } } @@ -519,7 +566,8 @@ where Some(DataToken::PixelSequenceStart), DataElementTokens::PixelData( Some(fragments), - OffsetTableItem(offset_table).into_tokens(), + OffsetTableItem(offset_table) + .into_tokens_with_options(*options), ), ) } @@ -591,7 +639,12 @@ where type Iter = DataElementTokens; fn into_tokens(self) -> Self::Iter { - DataElementTokens::Start(Some(self)) + //Avoid + self.into_tokens_with_options(Default::default()) + } + + fn into_tokens_with_options(self, options: IntoTokensOptions) -> Self::Iter { + DataElementTokens::Start(Some(self), options) } } @@ -601,6 +654,7 @@ where pub struct FlattenTokens { seq: O, tokens: Option, + into_token_options: IntoTokensOptions, } impl Iterator for FlattenTokens @@ -616,7 +670,7 @@ where if self.tokens.is_none() { match self.seq.next() { Some(entries) => { - self.tokens = Some(entries.into_tokens()); + self.tokens = Some(entries.into_tokens_with_options(self.into_token_options)); } None => return None, } @@ -641,9 +695,14 @@ where type Iter = FlattenTokens< as IntoIterator>::IntoIter, ::Iter>; fn into_tokens(self) -> Self::Iter { + self.into_tokens_with_options(Default::default()) + } + + fn into_tokens_with_options(self, into_token_options: IntoTokensOptions) -> Self::Iter { FlattenTokens { seq: self.into_iter(), tokens: None, + into_token_options, } } } @@ -656,9 +715,14 @@ where FlattenTokens< as IntoIterator>::IntoIter, ::Iter>; fn into_tokens(self) -> Self::Iter { + self.into_tokens_with_options(Default::default()) + } + + fn into_tokens_with_options(self, into_token_options: IntoTokensOptions) -> Self::Iter { FlattenTokens { seq: self.into_iter(), tokens: None, + into_token_options, } } } @@ -682,13 +746,18 @@ impl ItemTokens where T: Iterator, { - pub fn new(len: Length, object: O) -> Self + pub fn new(len: Length, object: O, options: IntoTokensOptions) -> Self where O: IntoTokens, { + let len = if len.0 != 0 && options.force_invalidate_sq_length { + Length::UNDEFINED + } else { + len + }; ItemTokens::Start { len, - object_tokens: Some(object.into_tokens()), + object_tokens: Some(object.into_tokens_with_options(options)), } } } @@ -737,7 +806,11 @@ where type Iter = ItemTokens; fn into_tokens(self) -> Self::Iter { - ItemTokens::new(self.0, self.1) + self.into_tokens_with_options(Default::default()) + } + + fn into_tokens_with_options(self, options: IntoTokensOptions) -> Self::Iter { + ItemTokens::new(self.0, self.1, options) } } @@ -761,14 +834,19 @@ where type Iter = ItemValueTokens

; fn into_tokens(self) -> Self::Iter { - ItemValueTokens::new(self.0) + self.into_tokens_with_options(Default::default()) + } + + fn into_tokens_with_options(self, options: IntoTokensOptions) -> Self::Iter { + ItemValueTokens::new(self.0, options) } } #[derive(Debug)] pub enum ItemValueTokens

{ - /// Just started, an item header token will come next - Start(Option

), + /// Just started, an item header token will come next. Takes a bool to configure if inner + /// lengths can be trusted to be valid + Start(Option

, bool), /// Will return a token of the value Value(P), /// Will return an end of item token @@ -779,8 +857,8 @@ pub enum ItemValueTokens

{ impl

ItemValueTokens

{ #[inline] - pub fn new(value: P) -> Self { - ItemValueTokens::Start(Some(value)) + pub fn new(value: P, into_tokens_options: IntoTokensOptions) -> Self { + ItemValueTokens::Start(Some(value), into_tokens_options.force_invalidate_sq_length) } } @@ -792,13 +870,18 @@ where fn next(&mut self) -> Option { let (out, next_state) = match self { - ItemValueTokens::Start(value) => { + ItemValueTokens::Start(value, invalidate_len) => { let value = value.take().unwrap(); - let len = Length(value.as_ref().len() as u32); + let end_item = value.as_ref().is_empty(); + let len = if *invalidate_len && !end_item { + Length::UNDEFINED + } else { + Length(value.as_ref().len() as u32) + }; ( Some(DataToken::ItemStart { len }), - if len == Length(0) { + if end_item { ItemValueTokens::Done } else { ItemValueTokens::Value(value) @@ -833,6 +916,11 @@ where type Iter = OffsetTableItemTokens

; fn into_tokens(self) -> Self::Iter { + self.into_tokens_with_options(Default::default()) + } + + fn into_tokens_with_options(self, _options: IntoTokensOptions) -> Self::Iter { + //There are no sequences here that might need to be invalidated OffsetTableItemTokens::new(self.0) } } @@ -897,7 +985,7 @@ mod tests { PrimitiveValue, Tag, VR, }; - use super::{DataToken, IntoTokens, LazyDataToken}; + use super::{DataToken, IntoTokens, IntoTokensOptions, LazyDataToken}; use smallvec::smallvec; use dicom_encoding::{ @@ -932,9 +1020,14 @@ mod tests { >; fn into_tokens(self) -> Self::Iter { + self.into_tokens_with_options(Default::default()) + } + + fn into_tokens_with_options(self, into_token_options: IntoTokensOptions) -> Self::Iter { super::FlattenTokens { seq: self.1.into_iter(), tokens: None, + into_token_options, } } }