Skip to content

Commit

Permalink
convert skip to iterate instead of call recursive functions (#111)
Browse files Browse the repository at this point in the history
  • Loading branch information
davidhewitt authored May 31, 2024
1 parent fc5a710 commit a4cf796
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 33 deletions.
1 change: 1 addition & 0 deletions crates/jiter/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ ahash = "0.8.0"
smallvec = "1.11.0"
pyo3 = { version = "0.21.0", optional = true }
lexical-parse-float = { version = "0.8.5", features = ["format"] }
bitvec = "1.0.1"

[features]
python = ["dep:pyo3", "dep:pyo3-build-config"]
Expand Down
139 changes: 106 additions & 33 deletions crates/jiter/src/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -234,55 +234,128 @@ pub(crate) fn take_value_skip(
peek: Peek,
parser: &mut Parser,
tape: &mut Tape,
mut recursion_limit: u8,
recursion_limit: u8,
allow_inf_nan: bool,
) -> JsonResult<()> {
match peek {
Peek::True => parser.consume_true(),
Peek::False => parser.consume_false(),
Peek::Null => parser.consume_null(),
Peek::String => {
parser.consume_string::<StringDecoderRange>(tape, false)?;
Ok(())
}
Peek::String => parser.consume_string::<StringDecoderRange>(tape, false).map(drop),
Peek::Array => {
if let Some(peek_first) = parser.array_first()? {
check_recursion!(recursion_limit, parser.index,
take_value_skip(peek_first, parser, tape, recursion_limit, allow_inf_nan)?;
);
while let Some(peek) = parser.array_step()? {
check_recursion!(recursion_limit, parser.index,
take_value_skip(peek, parser, tape, recursion_limit, allow_inf_nan)?;
);
}
if let Some(next_peek) = parser.array_first()? {
take_value_skip_recursive(next_peek, ARRAY, parser, tape, recursion_limit, allow_inf_nan)
} else {
Ok(())
}
Ok(())
}
Peek::Object => {
if parser.object_first::<StringDecoderRange>(tape)?.is_some() {
let peek = parser.peek()?;
check_recursion!(recursion_limit, parser.index,
take_value_skip(peek, parser, tape, recursion_limit, allow_inf_nan)?;
);
while parser.object_step::<StringDecoderRange>(tape)?.is_some() {
let peek = parser.peek()?;
check_recursion!(recursion_limit, parser.index,
take_value_skip(peek, parser, tape, recursion_limit, allow_inf_nan)?;
);
}
take_value_skip_recursive(parser.peek()?, OBJECT, parser, tape, recursion_limit, allow_inf_nan)
} else {
Ok(())
}
Ok(())
}
_ => {
if let Err(e) = parser.consume_number::<NumberRange>(peek.into_inner(), allow_inf_nan) {
_ => parser
.consume_number::<NumberRange>(peek.into_inner(), allow_inf_nan)
.map(drop)
.map_err(|e| {
if !peek.is_num() {
Err(json_error!(ExpectedSomeValue, parser.index))
json_error!(ExpectedSomeValue, parser.index)
} else {
Err(e)
e
}
} else {
Ok(())
}),
}
}

const ARRAY: bool = false;
const OBJECT: bool = true;

#[inline(never)] // this is an iterative algo called only from take_value_skip, no point in inlining
fn take_value_skip_recursive(
mut peek: Peek,
mut current_recursion: bool,
parser: &mut Parser,
tape: &mut Tape,
recursion_limit: u8,
allow_inf_nan: bool,
) -> JsonResult<()> {
let mut recursion_stack = bitvec::bitarr![0; 256];
let recursion_limit: usize = recursion_limit.into();
let mut current_recursion_depth = 0;

macro_rules! push_recursion {
($next_peek:expr, $value:expr) => {
peek = $next_peek;
recursion_stack.set(
current_recursion_depth,
std::mem::replace(&mut current_recursion, $value),
);
current_recursion_depth += 1;
if current_recursion_depth >= recursion_limit {
return Err(json_error!(RecursionLimitExceeded, parser.index));
}
}
};
}

loop {
match peek {
Peek::True => parser.consume_true()?,
Peek::False => parser.consume_false()?,
Peek::Null => parser.consume_null()?,
Peek::String => {
parser.consume_string::<StringDecoderRange>(tape, false)?;
}
Peek::Array => {
if let Some(next_peek) = parser.array_first()? {
push_recursion!(next_peek, ARRAY);
// immediately jump to process the first value in the array
continue;
}
}
Peek::Object => {
if parser.object_first::<StringDecoderRange>(tape)?.is_some() {
push_recursion!(parser.peek()?, OBJECT);
// immediately jump to process the first value in the object
continue;
}
}
_ => {
parser
.consume_number::<NumberRange>(peek.into_inner(), allow_inf_nan)
.map_err(|e| {
if !peek.is_num() {
json_error!(ExpectedSomeValue, parser.index)
} else {
e
}
})?;
}
};

// now try to advance position in the current array or object
peek = loop {
match current_recursion {
ARRAY => {
if let Some(next_peek) = parser.array_step()? {
break next_peek;
}
}
OBJECT => {
if parser.object_step::<StringDecoderRange>(tape)?.is_some() {
break parser.peek()?;
}
}
}

current_recursion_depth = match current_recursion_depth.checked_sub(1) {
Some(r) => r,
// no recursion left, we are done
None => return Ok(()),
};

current_recursion = recursion_stack[current_recursion_depth];
};
}
}
36 changes: 36 additions & 0 deletions crates/jiter/tests/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -962,6 +962,42 @@ fn test_recursion_limit_incr() {
}
}

#[test]
fn test_recursion_limit_skip_array() {
let json = (0..2000).map(|_| "[ ").collect::<String>();
let bytes = json.as_bytes();
let mut jiter = Jiter::new(bytes);
let e = jiter.next_skip().unwrap_err();
assert_eq!(
e.error_type,
JiterErrorType::JsonError(JsonErrorType::RecursionLimitExceeded)
);
let expected_index = JsonValue::parse(bytes, false).unwrap_err().index;
assert_eq!(e.index, expected_index);
assert_eq!(
e.description(&jiter),
format!("recursion limit exceeded at line 1 column {}", expected_index + 1)
);
}

#[test]
fn test_recursion_limit_skip_object() {
let json = (0..2000).map(|_| "{\"a\": ").collect::<String>();
let bytes = json.as_bytes();
let mut jiter = Jiter::new(bytes);
let e = jiter.next_skip().unwrap_err();
assert_eq!(
e.error_type,
JiterErrorType::JsonError(JsonErrorType::RecursionLimitExceeded)
);
let expected_index = JsonValue::parse(bytes, false).unwrap_err().index;
assert_eq!(e.index, expected_index);
assert_eq!(
e.description(&jiter),
format!("recursion limit exceeded at line 1 column {}", expected_index + 1)
);
}

macro_rules! number_bytes {
($($name:ident: $json:literal => $expected:expr;)*) => {
$(
Expand Down

0 comments on commit a4cf796

Please sign in to comment.