Skip to content

Commit

Permalink
fix(lexer): correctly error when eof is reached before a multiline co…
Browse files Browse the repository at this point in the history
…mment is terminated

Signed-off-by: azjezz <azjezz@protonmail.com>
  • Loading branch information
azjezz committed Dec 18, 2024
1 parent 5b5b6e6 commit 7f80d83
Showing 3 changed files with 52 additions and 1 deletion.
3 changes: 3 additions & 0 deletions crates/lexer/src/error.rs
Original file line number Diff line number Diff line change
@@ -12,13 +12,15 @@ use mago_span::Span;
pub enum SyntaxError {
UnexpectedToken(u8, Position),
UnrecognizedToken(u8, Position),
UnexpectedEndOfFile(Position),
}

impl HasSpan for SyntaxError {
fn span(&self) -> Span {
let position = match self {
Self::UnexpectedToken(_, p) => *p,
Self::UnrecognizedToken(_, p) => *p,
Self::UnexpectedEndOfFile(p) => *p,
};

Span::new(position, Position { offset: position.offset + 1, ..position })
@@ -30,6 +32,7 @@ impl std::fmt::Display for SyntaxError {
let message = match self {
Self::UnexpectedToken(token, _) => &format!("Unexpected token `{}` (0x{:02X})", *token as char, token),
Self::UnrecognizedToken(token, _) => &format!("Unrecognised token `{}` (0x{:02X})", *token as char, token),
Self::UnexpectedEndOfFile(_) => "Unexpected end of file",
};

write!(f, "{}", message)
8 changes: 8 additions & 0 deletions crates/lexer/src/lib.rs
Original file line number Diff line number Diff line change
@@ -278,6 +278,7 @@ impl<'a, 'i> Lexer<'a, 'i> {
[b'/', b'*', asterisk] => {
let mut length = 2;
let mut is_multiline = false;
let mut terminated = false;
loop {
match self.input.peek(length, 2) {
[b'*', b'/'] => {
@@ -287,6 +288,7 @@ impl<'a, 'i> Lexer<'a, 'i> {

length += 2;

terminated = true;
break;
}
[_, ..] => {
@@ -298,6 +300,12 @@ impl<'a, 'i> Lexer<'a, 'i> {
}
}

if !terminated {
self.input.consume(length);

return Some(Err(SyntaxError::UnexpectedEndOfFile(self.input.position())));
}

if !is_multiline && asterisk == &b'*' {
(TokenKind::DocBlockComment, length)
} else {
42 changes: 41 additions & 1 deletion crates/lexer/tests/tokenizer.rs
Original file line number Diff line number Diff line change
@@ -65,6 +65,34 @@ fn test_empty_multiline_comments() -> Result<(), SyntaxError> {
})
}

#[test]
fn test_unterminated_multiple_comment() {
let code = b"<?php /* hello";
let expected = vec![TokenKind::OpenTag, TokenKind::Whitespace];

match test_lexer(code, expected) {
Ok(_) => panic!("expected error"),
Err(SyntaxError::UnexpectedEndOfFile(position)) => {
assert_eq!(position.offset, 14);
}
Err(err) => panic!("unexpected error: {}", err),
}
}

#[test]
fn test_unterminated_docblock_comment() {
let code = b"<?php /** hello";
let expected = vec![TokenKind::OpenTag, TokenKind::Whitespace];

match test_lexer(code, expected) {
Ok(_) => panic!("expected error"),
Err(SyntaxError::UnexpectedEndOfFile(position)) => {
assert_eq!(position.offset, 15);
}
Err(err) => panic!("unexpected error: {}", err),
}
}

#[test]
fn test_namespace() -> Result<(), SyntaxError> {
let code = b"<?php use Foo\\{Bar, Baz}";
@@ -707,13 +735,25 @@ fn test_lexer(code: &[u8], expected_kinds: Vec<TokenKind>) -> Result<(), SyntaxE
let mut lexer = Lexer::new(&interner, input);

let mut tokens = Vec::new();
let mut error = None;
while let Some(result) = lexer.advance() {
let token = result?;
let token = match result {
Ok(token) => token,
Err(err) => {
error = Some(err);

break;
}
};

tokens.push(token);
}

assert_eq!(expected_kinds, tokens.iter().map(|t| t.kind).collect::<Vec<_>>());
if let Some(err) = error {
return Err(err);
}

let mut found = String::new();
for token in tokens.iter() {
found.push_str(interner.lookup(&token.value));

0 comments on commit 7f80d83

Please sign in to comment.