Skip to content

Commit

Permalink
Fix #127 integer parsing
Browse files Browse the repository at this point in the history
Signed-off-by: Heinz N. Gies <heinz@licenser.net>
  • Loading branch information
Licenser authored and mfelsche committed Mar 22, 2022
1 parent 76ed6c3 commit dd1149a
Show file tree
Hide file tree
Showing 12 changed files with 34 additions and 54 deletions.
4 changes: 1 addition & 3 deletions src/preprocessor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -673,10 +673,8 @@ mod test {
Ok(())
}

const LOOKUP_TABLE: [&str; 17] = [
const LOOKUP_TABLE: [&str; 15] = [
"lines",
"lines-null",
"lines-pipe",
"base64",
"gzip",
"zlib",
Expand Down
2 changes: 1 addition & 1 deletion tests/script_errors/double_const/error.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
Error:
2 | const one = "two";
| ^^^^^^^^^^^^^^^^^ Can't define the constant `one` twice
| ^^^^^^^^^^^^^^^^^ Can't define the const `one` twice
2 changes: 1 addition & 1 deletion tests/script_errors/lexer_invalid_hex2/error.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
Error:
1 | -0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ An invalid hexadecimal
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ An invalid hexadecimal
2 changes: 1 addition & 1 deletion tests/script_errors/lexer_invalid_int/error.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
Error:
1 | -161390617380431786853494948250188242145606612051826469551916209783790476376052574664352834580008614464743948248296718335
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ An invalid integer literal
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ An invalid integer literal
12 changes: 1 addition & 11 deletions tremor-script/lib/std/integer.tremor
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,4 @@ use std::integer::unsigned;

## Parses a string as an integer.
## Returns an `integer`.
intrinsic fn parse(string) as integer::parse;

## Minimum valid integer value.
##
## Same as `signed::min`
const min = (-9223372036854775807);

## Maximum valid integer value.
##
## Same as `signed::max`
const max = 9223372036854775807;
intrinsic fn parse(string) as integer::parse;
18 changes: 9 additions & 9 deletions tremor-script/lib/std/integer/signed.tremor
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
### Signed integer constatns
### Signed integer constatns

## Max signed value
const max = 9223372036854775807;
## Minimal signed value
##
## This is slighty wrong, it should be `-9223372036854775808`
## see https://github.com/tremor-rs/tremor-runtime/issues/127
const min = (-9223372036854775807);
## Max signed value
const max = 9223372036854775807;

## Minimal signed value
##
# see https://github.com/tremor-rs/tremor-runtime/issues/127
# note the lexer is limited in how we can represent min
const min = -9223372036854775808;
3 changes: 1 addition & 2 deletions tremor-script/lib/std/integer/unsigned.tremor
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
##! Unsigned integer constatns

## Max signed value
## FIXME: This is a lexer issue that we can't make this as a u64 not a lexer type
const max = 9223372036854775807 * 2;
const max = 18446744073709551615;

## Minimal signed value
const min = 0;
2 changes: 1 addition & 1 deletion tremor-script/src/ast/raw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ impl Default for Endian {
pub struct BytesPartRaw<'script> {
pub data: ImutExprRaw<'script>,
pub data_type: IdentRaw<'script>,
pub bits: Option<i64>,
pub bits: Option<u64>,
pub(crate) mid: Box<NodeMeta>,
}
impl_expr!(BytesPartRaw);
Expand Down
1 change: 1 addition & 0 deletions tremor-script/src/ast/visitors/impls/const_folder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ impl<'run, 'script: 'run> ImutExprVisitor<'script> for ConstFolder<'run> {
.ok_or_else(|| {
let inner = b.extent();
let outer = b.extent();
dbg!(value);
err_invalid_unary(&outer, &inner, *kind, value)
})?
.into_owned();
Expand Down
2 changes: 1 addition & 1 deletion tremor-script/src/grammar.lalrpop
Original file line number Diff line number Diff line change
Expand Up @@ -1587,7 +1587,7 @@ extern {
"~=" => Token::TildeEq,
"~" => Token::Tilde,
"bool" => Token::BoolLiteral(<bool>),
"int" => Token::IntLiteral(<i64>),
"int" => Token::IntLiteral(<u64>),
"float" => Token::FloatLiteral(<f64>, <String>),
"string" => Token::StringLiteral(<Cow<'input, str>>),
"heredoc_start" => Token::HereDocStart,
Expand Down
17 changes: 11 additions & 6 deletions tremor-script/src/interpreter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -456,12 +456,17 @@ pub(crate) fn exec_unary<'run, 'event: 'run>(
}
} else if let Some(x) = val.as_u64() {
match &op {
Minus => x
.try_into()
.ok()
.and_then(i64::checked_neg)
.map(Value::from)
.map(Cow::Owned),
Minus => {
if x == 9223372036854775808 {
Some(Cow::Owned(Value::from(i64::MIN)))
} else {
x.try_into()
.ok()
.and_then(i64::checked_neg)
.map(Value::from)
.map(Cow::Owned)
}
}
Plus => Some(Cow::Owned(Value::from(x))),
BitNot => Some(Cow::Owned(Value::from(!x))),
Not => None,
Expand Down
23 changes: 5 additions & 18 deletions tremor-script/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ pub enum Token<'input> {
/// a boolean
BoolLiteral(bool),
/// an integer
IntLiteral(i64),
IntLiteral(u64),
/// an float
FloatLiteral(f64, String),
/// a test literal
Expand Down Expand Up @@ -1982,7 +1982,7 @@ impl<'input> Lexer<'input> {
let (end, hex) = self.extract_number(int_start, is_hex);
// ALLOW: this takes the whole string and can not panic
match int {
"0" | "-0" => match self.lookahead() {
"0" => match self.lookahead() {
Some((_, ch)) if is_ident_start(ch) => Err(ErrorKind::UnexpectedCharacter(
Span::new(start, end).expand_lines(2),
Span::new(start, end),
Expand All @@ -2007,9 +2007,8 @@ impl<'input> Lexer<'input> {
)
.into())
} else {
let is_positive = int == "0";
// ALLOW: this takes the whole string and can not panic
match i64_from_hex(&hex[..], is_positive) {
match u64::from_str_radix(&hex[..], 16) {
Ok(val) => Ok(spanned(start, end, Token::IntLiteral(val))),
Err(_err) => Err(ErrorKind::InvalidHexLiteral(
Span::new(start, end).expand_lines(2),
Expand Down Expand Up @@ -2038,7 +2037,7 @@ impl<'input> Lexer<'input> {
}
}

/// handle numbers (with or without leading '-')
/// handle numbers
#[allow(clippy::too_many_lines)]
fn number(&mut self, start: Location) -> Result<TokenSpan<'input>> {
let (end, int) = self.extract_number(start, is_dec_digit);
Expand Down Expand Up @@ -2080,15 +2079,6 @@ impl<'input> Lexer<'input> {
}
}

/// Converts partial hex literal (i.e. part after `0x` or `-0x`) to 64 bit signed integer.
///
/// This is basically a copy and adaptation of `std::num::from_str_radix`.
fn i64_from_hex(hex: &str, is_positive: bool) -> Result<i64> {
let r = i64::from_str_radix(hex, 16)?;

Ok(if is_positive { r } else { -r })
}

impl<'input> Iterator for Lexer<'input> {
type Item = Result<TokenSpan<'input>>;

Expand Down Expand Up @@ -2121,10 +2111,7 @@ impl<'input> Iterator for Lexer<'input> {
'^' => Some(Ok(spanned(start, start + ch, Token::BitXor))),
'&' => Some(Ok(spanned(start, start + ch, Token::BitAnd))),
':' => Some(Ok(self.colon(start))),
'-' => match self.lookahead() {
Some((_loc, c)) if is_dec_digit(c) => Some(self.number(start)),
_ => Some(Ok(spanned(start, start + ch, Token::Sub))),
},
'-' => Some(Ok(spanned(start, start + ch, Token::Sub))),
'#' => Some(Ok(self.comment(start))),
'=' => Some(Ok(self.eq(start))),
'<' => Some(Ok(self.lt(start))),
Expand Down

0 comments on commit dd1149a

Please sign in to comment.