-
Notifications
You must be signed in to change notification settings - Fork 37
Fixes more ion-tests for NativeReader
#377
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
6d2fd0b
be5c92d
ad47dab
a84e870
58e0783
9f81927
66cfcb4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -295,7 +295,7 @@ impl<R: IonDataSource> StreamReader for RawBinaryReader<R> { | |
// If the cursor is nested inside a parent object, don't attempt to read beyond the end of | ||
// the parent. Users can call '.step_out()' to progress beyond the container. | ||
if self.cursor.bytes_read >= parent.value_end_exclusive() { | ||
return Ok(RawStreamItem::Nothing); | ||
return Ok(self.set_current_item(RawStreamItem::Nothing)); | ||
} | ||
} | ||
|
||
|
@@ -397,7 +397,11 @@ impl<R: IonDataSource> StreamReader for RawBinaryReader<R> { | |
} | ||
|
||
fn ion_type(&self) -> Option<IonType> { | ||
self.cursor.value.header.ion_type | ||
use RawStreamItem::*; | ||
match self.current() { | ||
Value(ion_type) | Null(ion_type) => Some(ion_type), | ||
_ => None, | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ |
||
} | ||
|
||
fn is_null(&self) -> bool { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,17 +2,19 @@ use crate::raw_symbol_token::RawSymbolToken; | |
use crate::text::parsers::comments::whitespace_or_comments; | ||
use nom::bytes::streaming::tag; | ||
use nom::character::streaming::multispace0; | ||
use nom::combinator::map_opt; | ||
use nom::combinator::{map_opt, opt}; | ||
use nom::multi::many1; | ||
use nom::sequence::{delimited, pair, preceded}; | ||
use nom::sequence::{delimited, pair, preceded, terminated}; | ||
use nom::IResult; | ||
|
||
use crate::text::parsers::symbol::parse_symbol; | ||
use crate::text::parsers::whitespace; | ||
use crate::text::text_value::TextValue; | ||
|
||
/// Matches a series of '::'-delimited symbols used to annotate a value. | ||
/// Matches a series of '::'-delimited symbols used to annotate a value. Trailing whitespace | ||
/// is permitted. | ||
pub(crate) fn parse_annotations(input: &str) -> IResult<&str, Vec<RawSymbolToken>> { | ||
many1(parse_annotation)(input) | ||
terminated(many1(parse_annotation), opt(whitespace))(input) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ Whitespace can now appear between the last annotation delimiter (
|
||
} | ||
|
||
/// Matches a single symbol of any format (foo, 'foo', or $10) followed by a '::' delimiter. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,11 @@ | ||
use crate::text::parsers::string::long_string_fragment_without_escaped_text; | ||
use crate::text::parsers::text_support::{ | ||
escaped_char_no_unicode, escaped_newline, StringFragment, | ||
}; | ||
use crate::text::parsers::whitespace; | ||
use crate::text::text_value::TextValue; | ||
use nom::branch::alt; | ||
use nom::bytes::streaming::{is_not, tag, take_until}; | ||
use nom::bytes::streaming::{is_not, tag}; | ||
use nom::character::streaming::char; | ||
use nom::combinator::{map, not, opt, peek, verify}; | ||
use nom::multi::{fold_many0, many1}; | ||
|
@@ -89,9 +90,9 @@ fn long_clob_fragment(input: &str) -> IResult<&str, StringFragment> { | |
|
||
/// Matches the next string fragment while respecting the long clob delimiter (`'''`). | ||
fn long_clob_fragment_without_escaped_text(input: &str) -> IResult<&str, StringFragment> { | ||
map(verify(take_until("'''"), |s: &str| !s.is_empty()), |text| { | ||
StringFragment::Substring(text) | ||
})(input) | ||
// Matches the head of the string up to the next `'''` or `\`. | ||
// Will not match if the `'''` or `\` is at the very beginning of the string. | ||
long_string_fragment_without_escaped_text(input) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ For the moment, this logic is identical to that used for long strings. This may change in a future PR as I believe there are additional escape sequence rules to consider for |
||
} | ||
|
||
/// Matches the body of a short clob. (The `hello` in `"hello"`.) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
use crate::text::parsers::comments::whitespace_or_comments; | ||
use crate::text::parsers::text_support::{escaped_char, escaped_newline, StringFragment}; | ||
use crate::text::parsers::whitespace; | ||
use crate::text::text_value::TextValue; | ||
use nom::branch::alt; | ||
use nom::bytes::streaming::{is_not, tag}; | ||
|
@@ -36,7 +36,7 @@ fn long_string(input: &str) -> IResult<&str, TextValue> { | |
terminated( | ||
many1(terminated( | ||
delimited(tag("'''"), long_string_body, tag("'''")), | ||
opt(whitespace), | ||
opt(whitespace_or_comments), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ Long strings can have comments between fragments.
|
||
)), | ||
peek(not(tag("'''"))), | ||
), | ||
|
@@ -71,7 +71,9 @@ fn long_string_fragment(input: &str) -> IResult<&str, StringFragment> { | |
|
||
/// Matches the next string fragment without escape sequences while also respecting the long | ||
/// string delimiter (`'''`). | ||
fn long_string_fragment_without_escaped_text(input: &str) -> IResult<&str, StringFragment> { | ||
pub(in crate::text::parsers) fn long_string_fragment_without_escaped_text( | ||
input: &str, | ||
) -> IResult<&str, StringFragment> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ |
||
// In contrast with the `short_string_fragment_without_escaped_text` function, this parser is | ||
// hand-written because has two possible 'end' sequences to detect: | ||
// 1. A slash (`\`), indicating the beginning of an escape sequence. | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -47,24 +47,35 @@ impl ElementReader for NativeElementReader { | |||||
} | ||||||
|
||||||
impl<R: RawReader> NativeElementIterator<R> { | ||||||
/// Recursively materialize the next Ion value in the stream and return it as `Ok(Some(element))`. | ||||||
/// If there are no more values at this level, returns `Ok(None)`. | ||||||
/// If an error occurs while materializing the value, returns an `Err`. | ||||||
/// Advances the reader to the next value in the stream and uses [Self::materialize_current] | ||||||
/// to materialize it. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🗺️ The original implementation of When the reader went to read struct fields, it would call I broke |
||||||
fn materialize_next(&mut self) -> IonResult<Option<OwnedElement>> { | ||||||
// Advance the reader to the next value | ||||||
let current_item = self.reader.next()?; | ||||||
let _ = self.reader.next()?; | ||||||
self.materialize_current() | ||||||
} | ||||||
|
||||||
/// Recursively materialize the reader's current Ion value and returns it as `Ok(Some(element))`. | ||||||
/// If there are no more values at this level, returns `Ok(None)`. | ||||||
/// If an error occurs while materializing the value, returns an `Err`. | ||||||
/// Calling this method advances the reader and consumes the current value. | ||||||
fn materialize_current(&mut self) -> IonResult<Option<OwnedElement>> { | ||||||
// Collect this item's annotations into a Vec. We have to do this before materializing the | ||||||
// value itself because materializing a collection requires advancing the reader further. | ||||||
let mut annotations = Vec::new(); | ||||||
for annotation in self.reader.annotations() { | ||||||
// If the annotation couldn't be resolved to text, early return the error. | ||||||
let annotation = annotation?; | ||||||
let symbol = owned::text_token(annotation.as_ref()); | ||||||
annotations.push(symbol); | ||||||
// Current API limitations require `self.reader.annotations()` to heap allocate its | ||||||
// iterator even if there aren't annotations. `self.reader.has_annotations()` is trivial | ||||||
// and allows us to skip the heap allocation in the common case. | ||||||
if self.reader.has_annotations() { | ||||||
for annotation in self.reader.annotations() { | ||||||
// If the annotation couldn't be resolved to text, early return the error. | ||||||
let annotation = annotation?; | ||||||
let symbol = owned::text_token(annotation.as_ref()); | ||||||
annotations.push(symbol); | ||||||
} | ||||||
} | ||||||
|
||||||
let value = match current_item { | ||||||
let value = match self.reader.current() { | ||||||
// No more values at this level of the stream | ||||||
StreamItem::Nothing => return Ok(None), | ||||||
// This is a typed null | ||||||
|
@@ -92,7 +103,6 @@ impl<R: RawReader> NativeElementIterator<R> { | |||||
} | ||||||
} | ||||||
}; | ||||||
|
||||||
Ok(Some(OwnedElement::new(annotations, value))) | ||||||
} | ||||||
|
||||||
|
@@ -115,9 +125,12 @@ impl<R: RawReader> NativeElementIterator<R> { | |||||
fn materialize_struct(&mut self) -> IonResult<OwnedStruct> { | ||||||
let mut child_elements = Vec::new(); | ||||||
self.reader.step_in()?; | ||||||
while let Some(element) = self.materialize_next()? { | ||||||
while let StreamItem::Value(_) | StreamItem::Null(_) = self.reader.next()? { | ||||||
let field = self.reader.field_name()?; | ||||||
child_elements.push((owned::text_token(field.as_ref()), element)); | ||||||
let value = self | ||||||
.materialize_current()? | ||||||
.expect("materialize_current() returned None for user data"); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Question:
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
No, it // v-- Here we confirm that there's definitely either a value or a null.
while let StreamItem::Value(_) | StreamItem::Null(_) = self.reader.next()? {
let field = self.reader.field_name()?;
child_elements.push((owned::text_token(field.as_ref()), element));
let value = self
.materialize_current()? // <-- here we use `?` to handle any errors
// At this point, there's definitely a value (or null) and there weren't any errors...
.expect("materialize_current() returned None for user data");
// ...so we can use `expect` to dismiss the `None` case. I agree that the error message could be clearer; I'll fix it in a follow-on PR to avoid complicating the rebases I need to do in other PRs. |
||||||
child_elements.push((owned::text_token(field.as_ref()), value)); | ||||||
} | ||||||
self.reader.step_out()?; | ||||||
Ok(OwnedStruct::from_iter(child_elements.into_iter())) | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🗺️ When the reader encountered the end of a container, it would return
StreamItem::Nothing
without recording that as the currentStreamItem
. This meant that later calls to methods likeion_type()
might operate on stale data.