Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds experimental-location feature flag #910

Merged
merged 2 commits into from
Feb 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ rust-version = "1.80"
default = []
experimental-ion-hash = ["digest", "experimental-reader-writer"]

# Access location information of the input Ion from underlying buffer.
source-location = []

# Feature for indicating particularly bleeding edge APIs or functionality in the library.
# These are not guaranteed any sort of API stability and may also have non-standard
# Ion behavior (e.g., draft Ion 1.1 capabilities).
Expand Down
30 changes: 26 additions & 4 deletions src/lazy/text/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,10 @@ pub struct TextBuffer<'top> {
// offset: 6
data: &'top [u8],
offset: usize,
#[cfg(feature = "source-location")]
// `row` is the row position of the input data in this buffer.
row: usize,
#[cfg(feature = "source-location")]
// `prev_newline_offset` is the previously encountered newline byte's offset value.
// this is useful in calculating the column position of the input data in this buffer.
prev_newline_offset: usize,
Expand Down Expand Up @@ -148,7 +150,9 @@ impl<'top> TextBuffer<'top> {
context,
data,
offset,
#[cfg(feature = "source-location")]
row: 1,
#[cfg(feature = "source-location")]
prev_newline_offset: 0,
is_final_data,
}
Expand Down Expand Up @@ -215,12 +219,14 @@ impl<'top> TextBuffer<'top> {
self.offset
}

#[cfg(feature = "source-location")]
/// Returns the row position for this buffer.
/// _Note: Row positions are calculated based on newline characters `\n` and `\r`. `\r\n` together in this order is considered a single newline._
pub fn row(&self) -> usize {
self.row
}

#[cfg(feature = "source-location")]
/// Returns the column position for this buffer.
/// _Note: Column positions are calculated based on current offset and previous newline byte offset._
pub fn column(&self) -> usize {
Expand Down Expand Up @@ -267,10 +273,12 @@ impl<'top> TextBuffer<'top> {
/// Matches one or more whitespace characters.
pub fn match_whitespace1(&mut self) -> IonMatchResult<'top> {
let result = take_while(1.., WHITESPACE_BYTES).parse_next(self)?;
#[cfg(feature = "source-location")]
self.update_location_metadata(result.data);
Ok(result)
}

#[cfg(feature = "source-location")]
/// Updates the location metadata based on the matched whitespace bytes in the consumed buffer
fn update_location_metadata(&mut self, data: &'top [u8]) {
if !data.is_empty() {
Expand Down Expand Up @@ -304,6 +312,7 @@ impl<'top> TextBuffer<'top> {
/// Matches zero or more whitespace characters.
pub fn match_whitespace0(&mut self) -> IonMatchResult<'top> {
let result = take_while(0.., WHITESPACE_BYTES).parse_next(self)?;
#[cfg(feature = "source-location")]
self.update_location_metadata(result.data);
Ok(result)
}
Expand Down Expand Up @@ -362,6 +371,7 @@ impl<'top> TextBuffer<'top> {
)
.take()
.parse_next(self)?;
#[cfg(feature = "source-location")]
self.update_location_metadata(result.data);
Ok(result)
}
Expand Down Expand Up @@ -1652,6 +1662,8 @@ impl<'top> TextBuffer<'top> {
// If the input doesn't contain one, this will return an `Incomplete`.
// `match_text_until_escaped` does NOT include the delimiter byte in the match,
// so `remaining_after_match` starts at the delimiter byte.
// Note: `matched_input_buffer` is used under a feature flag, hence suppress clippy warnings for this.
#[allow(unused_variables)]
let (matched_input_buffer, segment_contained_escapes) =
remaining.match_text_until_unescaped(delimiter_head, true)?;
contained_escapes |= segment_contained_escapes;
Expand All @@ -1662,6 +1674,7 @@ impl<'top> TextBuffer<'top> {
let matched_input = self.slice(0, relative_match_end);
self.consume(relative_match_end);
// This input may contain newline characters hence update the location metadata.
#[cfg(feature = "source-location")]
self.update_location_metadata(matched_input_buffer.bytes());
return Ok((matched_input, contained_escapes));
} else {
Expand Down Expand Up @@ -2050,12 +2063,16 @@ impl<'data> Stream for TextBuffer<'data> {
}

fn reset(&mut self, checkpoint: &Self::Checkpoint) {
let current_row = self.row;
let prev_column_value = self.prev_newline_offset;
#[cfg(feature = "source-location")]
let (current_row, prev_column_value) = (self.row, self.prev_newline_offset);

*self = *checkpoint;
self.row = current_row;
self.prev_newline_offset = prev_column_value;

#[cfg(feature = "source-location")]
{
self.row = current_row;
self.prev_newline_offset = prev_column_value;
}
}

fn raw(&self) -> &dyn Debug {
Expand Down Expand Up @@ -2248,6 +2265,7 @@ mod tests {
);
}

#[cfg(feature = "source-location")]
fn expect_match_location<'data, P, O>(&'data self, parser: P, expected_location: (usize, usize))
where
P: Parser<TextBuffer<'data>, O, IonParseError<'data>>,
Expand All @@ -2264,6 +2282,7 @@ mod tests {
self.input,
&self.input[..match_length]
);

// Assert the location metadata
assert_eq!(expected_location, (result.0.row(), result.0.column()));
}
Expand Down Expand Up @@ -2928,6 +2947,7 @@ mod tests {
#[case::tabs_after_newline_2("\t\n\t\t", (2,3))]
#[case::tabs_after_newline_3("\n\t\n\t", (3,2))]
#[case::mix_tabs_and_newlines("\n\t\n", (3,1))]
#[cfg(feature = "source-location")]
fn expect_whitespace(#[case] input: &str, #[case] expected_location: (usize, usize)) {
MatchTest::new_1_0(input).expect_match_location(match_length(TextBuffer::match_whitespace0), expected_location);
}
Expand All @@ -2943,6 +2963,7 @@ mod tests {
#[case::newline_after_comment("/*comment*/\n", (2,1))]
#[case::newline_inside_comment("/*multiline \n comment*/", (2,11))]
#[case::newlines_inside_comment("/*this is a \n multiline \n comment*/", (3,11))]
#[cfg(feature = "source-location")]
fn expect_whitespace_with_comment(#[case] input: &str, #[case] expected_location: (usize, usize)) {
MatchTest::new_1_0(input).expect_match_location(match_length(TextBuffer::match_optional_comments_and_whitespace), expected_location);
}
Expand All @@ -2957,6 +2978,7 @@ mod tests {
#[case::two_segment_with_newlines("'''long\n''' '''string\n'''", (3, 4))]
#[case::two_segment_long_string_mixed("'''long\n''' \n '''string\n'''", (4, 4))]
#[case::single_segment_with_whitespace("'''long \n\r\n\t hello'''", (3, 11))]
#[cfg(feature = "source-location")]
fn expect_newline_long_text(#[case] input: &str, #[case] expected_location: (usize, usize)) {
MatchTest::new_1_0(input).expect_match_location(match_length(TextBuffer::match_string), expected_location);
}
Expand Down
Loading