Skip to content

Commit

Permalink
feat: consistent index result output
Browse files Browse the repository at this point in the history
The `--result bytes` mode now consistently reports the first byte of the value it matched. This can be used to extract the actual value from the JSON by parsing from the reported byte.

Ref: #161
  • Loading branch information
V0ldek committed Jun 19, 2023
1 parent cda16b5 commit d433876
Show file tree
Hide file tree
Showing 13 changed files with 508 additions and 101 deletions.
3 changes: 3 additions & 0 deletions crates/rsonpath-lib/src/engine/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ pub enum EngineError {
/// closing characters.
#[error("Malformed input JSON; end of input was reached, but unmatched opening characters remained.")]
MissingClosingCharacter(),
/// The engine found a query match, but no value associated with it.
#[error("Malformed input JSON; a query match was found, but there was no associated value")]
MissingItem(),
/// An error occurred when trying to parse a member name terminated by a particular colon character.
/// The inner [`usize`] value should be set to the byte index of the colon.
#[error(
Expand Down
33 changes: 19 additions & 14 deletions crates/rsonpath-lib/src/engine/head_skipping.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,21 @@
//! Engine decorator that performs **head skipping** – an extremely optimized search for
//! the first matching member name in a query starting with a self-looping state.
//! This happens in queries starting with a descendant selector.
use super::error::EngineError;
use crate::debug;
use crate::query::{
automaton::{Automaton, State},
JsonString,
};
use crate::result::QueryResult;
use crate::FallibleIterator;
use crate::BLOCK_SIZE;
use crate::{
classification::{
quotes::{classify_quoted_sequences, QuoteClassifiedIterator},
structural::{resume_structural_classification, Structural, StructuralIterator},
ResumeClassifierState,
},
debug,
engine::EngineError,
input::Input,
query::{
automaton::{Automaton, State},
JsonString,
},
result::{NodeTypeHint, QueryResult, QueryResultBuilder},
FallibleIterator, BLOCK_SIZE,
};

/// Trait that needs to be implemented by an [`Engine`](`super::Engine`) to use this submodule.
Expand All @@ -34,16 +33,17 @@ pub(super) trait CanHeadSkip<'b, I: Input, const N: usize> {
/// and execute the query until a matching [`Structural::Closing`] character is encountered,
/// using `classifier` for classification and `result` for reporting query results. The `classifier`
/// must *not* be used to classify anything past the matching [`Structural::Closing`] character.
fn run_on_subtree<'r, R, Q, S>(
fn run_on_subtree<'r, B, R, Q, S>(
&mut self,
next_event: Structural,
state: State,
structural_classifier: S,
result: &'r mut R,
result: &'r mut B,
) -> Result<ResumeClassifierState<'b, I, Q, N>, EngineError>
where
I: Input,
Q: QuoteClassifiedIterator<'b, I, N>,
B: QueryResultBuilder<'b, I, R>,
R: QueryResult,
S: StructuralIterator<'b, I, Q, N>;
}
Expand Down Expand Up @@ -104,10 +104,15 @@ impl<'b, 'q, I: Input> HeadSkip<'b, 'q, I, BLOCK_SIZE> {

/// Run a preconfigured [`HeadSkip`] using the given `engine` and reporting
/// to the `result`.
pub(super) fn run_head_skipping<'r, E: CanHeadSkip<'b, I, BLOCK_SIZE>, R: QueryResult>(
pub(super) fn run_head_skipping<
'r,
E: CanHeadSkip<'b, I, BLOCK_SIZE>,
B: QueryResultBuilder<'b, I, R>,
R: QueryResult,
>(
&self,
engine: &mut E,
result: &'r mut R,
result: &'r mut B,
) -> Result<(), EngineError> {
let mut classifier_state = ResumeClassifierState {
iter: classify_quoted_sequences(self.bytes),
Expand All @@ -134,7 +139,7 @@ impl<'b, 'q, I: Input> HeadSkip<'b, 'q, I, BLOCK_SIZE> {
classifier_state.offset_bytes(distance as isize)?;

if self.is_accepting {
result.report(colon_idx);
result.report(colon_idx, NodeTypeHint::Any)?;
}

// Check if the colon is marked as within quotes.
Expand Down
87 changes: 49 additions & 38 deletions crates/rsonpath-lib/src/engine/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,24 @@
//! even on targets that do not support AVX2 SIMD operations.
#[cfg(feature = "head-skip")]
use super::head_skipping::{CanHeadSkip, HeadSkip};
use super::Compiler;
#[cfg(feature = "head-skip")]
use crate::classification::ResumeClassifierState;
use crate::debug;
use crate::engine::depth::Depth;
use crate::engine::error::EngineError;
#[cfg(feature = "tail-skip")]
use crate::engine::tail_skipping::TailSkip;
use crate::engine::{Engine, Input};
use crate::query::automaton::{Automaton, State};
use crate::query::error::CompilerError;
use crate::query::{JsonPathQuery, JsonString, NonNegativeArrayIndex};
use crate::result::QueryResult;
use crate::FallibleIterator;
use crate::BLOCK_SIZE;
use crate::{
classification::{
quotes::{classify_quoted_sequences, QuoteClassifiedIterator},
structural::{classify_structural_characters, BracketType, Structural, StructuralIterator},
},
query::automaton::TransitionLabel,
debug,
engine::{depth::Depth, error::EngineError, Compiler, Engine, Input},
query::{
automaton::{Automaton, State, TransitionLabel},
error::CompilerError,
JsonPathQuery, JsonString, NonNegativeArrayIndex,
},
result::{NodeTypeHint, QueryResult, QueryResultBuilder},
FallibleIterator, BLOCK_SIZE,
};
use smallvec::{smallvec, SmallVec};

Expand Down Expand Up @@ -65,24 +62,24 @@ impl Engine for MainEngine<'_> {
return empty_query(input);
}

let mut result = R::default();
let mut result = R::Builder::new(input);
let executor = query_executor(&self.automaton, input);
executor.run(&mut result)?;

Ok(result)
Ok(result.finish())
}
}

fn empty_query<I: Input, R: QueryResult>(bytes: &I) -> Result<R, EngineError> {
let quote_classifier = classify_quoted_sequences(bytes);
let mut block_event_source = classify_structural_characters(quote_classifier);
let mut result = R::default();
let mut result = R::Builder::new(bytes);

if let Some(Structural::Opening(_, idx)) = block_event_source.next()? {
result.report(idx);
result.report(idx, NodeTypeHint::AnyComplex)?;
}

Ok(result)
Ok(result.finish())
}

#[cfg(feature = "tail-skip")]
Expand Down Expand Up @@ -128,7 +125,7 @@ fn query_executor<'q, 'b, I: Input>(automaton: &'b Automaton<'q>, bytes: &'b I)

impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
#[cfg(feature = "head-skip")]
fn run<R: QueryResult>(mut self, result: &mut R) -> Result<(), EngineError> {
fn run<B: QueryResultBuilder<'b, I, R>, R: QueryResult>(mut self, result: &mut B) -> Result<(), EngineError> {
let mb_head_skip = HeadSkip::new(self.bytes, self.automaton);

match mb_head_skip {
Expand All @@ -138,11 +135,14 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
}

#[cfg(not(feature = "head-skip"))]
fn run<R: QueryResult>(self, result: &mut R) -> Result<(), EngineError> {
fn run<B: QueryResultBuilder<'b, I, R>, R: QueryResult>(self, result: &mut B) -> Result<(), EngineError> {
self.run_and_exit(result)
}

fn run_and_exit<R: QueryResult>(mut self, result: &mut R) -> Result<(), EngineError> {
fn run_and_exit<B: QueryResultBuilder<'b, I, R>, R: QueryResult>(
mut self,
result: &mut B,
) -> Result<(), EngineError> {
let quote_classifier = classify_quoted_sequences(self.bytes);
let structural_classifier = classify_structural_characters(quote_classifier);
#[cfg(feature = "tail-skip")]
Expand All @@ -158,11 +158,12 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
fn run_on_subtree<
Q: QuoteClassifiedIterator<'b, I, BLOCK_SIZE>,
S: StructuralIterator<'b, I, Q, BLOCK_SIZE>,
B: QueryResultBuilder<'b, I, R>,
R: QueryResult,
>(
&mut self,
classifier: &mut Classifier!(),
result: &mut R,
result: &mut B,
) -> Result<(), EngineError> {
loop {
if self.next_event.is_none() {
Expand Down Expand Up @@ -197,15 +198,16 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
Ok(())
}

fn handle_colon<Q, S, R>(
fn handle_colon<Q, S, B, R>(
&mut self,
classifier: &mut Classifier!(),
idx: usize,
result: &mut R,
result: &mut B,
) -> Result<(), EngineError>
where
Q: QuoteClassifiedIterator<'b, I, BLOCK_SIZE>,
S: StructuralIterator<'b, I, Q, BLOCK_SIZE>,
B: QueryResultBuilder<'b, I, R>,
R: QueryResult,
{
debug!("Colon");
Expand All @@ -221,7 +223,7 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
TransitionLabel::ArrayIndex(_) => {}
TransitionLabel::ObjectMember(member_name) => {
if self.automaton.is_accepting(target) && self.is_match(idx, member_name)? {
result.report(idx);
result.report(idx, NodeTypeHint::Atomic /* since is_next_opening is false */)?;
any_matched = true;
break;
}
Expand All @@ -230,7 +232,7 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
}
let fallback_state = self.automaton[self.state].fallback_state();
if !any_matched && self.automaton.is_accepting(fallback_state) {
result.report(idx);
result.report(idx, NodeTypeHint::Atomic /* since is_next_opening is false */)?;
}
#[cfg(feature = "unique-members")]
{
Expand All @@ -247,15 +249,16 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
Ok(())
}

fn handle_comma<Q, S, R>(
fn handle_comma<Q, S, B, R>(
&mut self,
classifier: &mut Classifier!(),
idx: usize,
result: &mut R,
result: &mut B,
) -> Result<(), EngineError>
where
Q: QuoteClassifiedIterator<'b, I, BLOCK_SIZE>,
S: StructuralIterator<'b, I, Q, BLOCK_SIZE>,
B: QueryResultBuilder<'b, I, R>,
R: QueryResult,
{
self.next_event = classifier.next()?;
Expand All @@ -266,7 +269,7 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {

if !is_next_opening && self.is_list && is_fallback_accepting {
debug!("Accepting on comma.");
result.report(idx);
result.report(idx, NodeTypeHint::Atomic /* since is_next_opening is false */)?;
}

// After wildcard, check for a matching array index.
Expand All @@ -282,22 +285,23 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {

if !is_next_opening && match_index {
debug!("Accepting on list item.");
result.report(idx);
result.report(idx, NodeTypeHint::Atomic /* since is_next_opening is false */)?;
}

Ok(())
}

fn handle_opening<Q, S, R>(
fn handle_opening<Q, S, B, R>(
&mut self,
classifier: &mut Classifier!(),
bracket_type: BracketType,
idx: usize,
result: &mut R,
result: &mut B,
) -> Result<(), EngineError>
where
Q: QuoteClassifiedIterator<'b, I, BLOCK_SIZE>,
S: StructuralIterator<'b, I, Q, BLOCK_SIZE>,
B: QueryResultBuilder<'b, I, R>,
R: QueryResult,
{
debug!("Opening {bracket_type:?}, increasing depth and pushing stack.",);
Expand All @@ -313,7 +317,7 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
self.transition_to(target, bracket_type);
if self.automaton.is_accepting(target) {
debug!("Accept {idx}");
result.report(idx);
result.report(idx, NodeTypeHint::Complex(bracket_type))?;
}
break;
}
Expand All @@ -324,7 +328,7 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
any_matched = true;
self.transition_to(target, bracket_type);
if self.automaton.is_accepting(target) {
result.report(colon_idx);
result.report(colon_idx, NodeTypeHint::Complex(bracket_type))?;
}
break;
}
Expand All @@ -348,7 +352,7 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
self.transition_to(fallback, bracket_type);

if self.automaton.is_accepting(fallback) {
result.report(idx);
result.report(idx, NodeTypeHint::Complex(bracket_type))?;
}
}

Expand Down Expand Up @@ -378,12 +382,18 @@ impl<'q, 'b, I: Input> Executor<'q, 'b, I> {
Some(Structural::Closing(_, close_idx)) => {
if let Some((next_idx, _)) = self.bytes.seek_non_whitespace_forward(idx + 1)? {
if next_idx < close_idx {
result.report(next_idx);
result.report(
next_idx,
NodeTypeHint::Atomic, /* since the next structural is the closing of the list */
)?;
}
}
}
Some(Structural::Comma(_)) => {
result.report(idx + 1);
result.report(
idx + 1,
NodeTypeHint::Atomic, /* since the next structural is a ','*/
)?;
}
_ => (),
}
Expand Down Expand Up @@ -589,15 +599,16 @@ impl SmallStack {

#[cfg(feature = "head-skip")]
impl<'q, 'b, I: Input> CanHeadSkip<'b, I, BLOCK_SIZE> for Executor<'q, 'b, I> {
fn run_on_subtree<'r, R, Q, S>(
fn run_on_subtree<'r, B, R, Q, S>(
&mut self,
next_event: Structural,
state: State,
structural_classifier: S,
result: &'r mut R,
result: &'r mut B,
) -> Result<ResumeClassifierState<'b, I, Q, BLOCK_SIZE>, EngineError>
where
Q: QuoteClassifiedIterator<'b, I, BLOCK_SIZE>,
B: QueryResultBuilder<'b, I, R>,
R: QueryResult,
S: StructuralIterator<'b, I, Q, BLOCK_SIZE>,
{
Expand Down
Loading

0 comments on commit d433876

Please sign in to comment.