From e542fe5c0a71cdf4fc0752e6c77de18f51aac2e9 Mon Sep 17 00:00:00 2001 From: DaniPopes <57450786+DaniPopes@users.noreply.github.com> Date: Fri, 2 Aug 2024 18:38:20 +0200 Subject: [PATCH] feat: escape import paths --- crates/parse/src/lexer/unescape/mod.rs | 16 ++++++++---- crates/parse/src/parser/lit.rs | 7 +++--- crates/sema/src/parse.rs | 35 ++++++++++++++++++++++---- tests/ui/parser/escaped_import.sol | 1 + tests/ui/parser/escaped_import.stderr | 9 +++++++ 5 files changed, 55 insertions(+), 13 deletions(-) create mode 100644 tests/ui/parser/escaped_import.sol create mode 100644 tests/ui/parser/escaped_import.stderr diff --git a/crates/parse/src/lexer/unescape/mod.rs b/crates/parse/src/lexer/unescape/mod.rs index 1fcb0748..23239eba 100644 --- a/crates/parse/src/lexer/unescape/mod.rs +++ b/crates/parse/src/lexer/unescape/mod.rs @@ -20,8 +20,14 @@ pub enum Mode { } /// Parses a string literal (without quotes) into a byte array. -#[instrument(level = "debug", skip_all)] -pub fn parse_string_literal(src: &str, mode: Mode, f: F) -> Vec +pub fn parse_string_literal(src: &str, mode: Mode) -> Cow<'_, [u8]> { + try_parse_string_literal(src, mode, |_, _| {}) +} + +/// Parses a string literal (without quotes) into a byte array. +/// `f` is called for each escape error. +#[instrument(name = "parse_string_literal", level = "debug", skip_all)] +pub fn try_parse_string_literal(src: &str, mode: Mode, f: F) -> Cow<'_, [u8]> where F: FnMut(Range, EscapeError), { @@ -36,7 +42,7 @@ where bytes = Cow::Owned(decoded); } } - bytes.into_owned() + bytes } #[cold] @@ -58,7 +64,7 @@ where debug_assert!(dst_buf.is_empty()); debug_assert!(dst_buf.capacity() >= src.len()); let mut dst = unsafe { slice::from_raw_parts_mut(dst_buf.as_mut_ptr(), dst_buf.capacity()) }; - unescape_literal(src, mode, |range, res| match res { + unescape_literal_unchecked(src, mode, |range, res| match res { Ok(c) => { // NOTE: We can't use `char::encode_utf8` because `c` can be an invalid unicode code. let written = super::utf8::encode_utf8_raw(c, dst).len(); @@ -310,7 +316,7 @@ mod tests { assert_eq!(ok, expected_str, "{panic_str}"); let mut errs2 = Vec::with_capacity(errs.len()); - let out = parse_string_literal(src, mode, |range, e| { + let out = try_parse_string_literal(src, mode, |range, e| { errs2.push((range, e)); }); assert_eq!(errs2, errs, "{panic_str}"); diff --git a/crates/parse/src/parser/lit.rs b/crates/parse/src/parser/lit.rs index dc9a6b44..7b360e76 100644 --- a/crates/parse/src/parser/lit.rs +++ b/crates/parse/src/parser/lit.rs @@ -151,14 +151,15 @@ impl<'sess, 'ast> Parser<'sess, 'ast> { TokenLitKind::HexStr => unescape::Mode::HexStr, _ => unreachable!(), }; - let parse = |s: Symbol| unescape::parse_string_literal(s.as_str(), mode, |_, _| {}); - let mut value = parse(lit.symbol); + let mut value = unescape::parse_string_literal(lit.symbol.as_str(), mode); while let Some(TokenLit { symbol, kind }) = self.token.lit() { if kind != lit.kind { break; } - value.append(&mut parse(symbol)); + value + .to_mut() + .extend_from_slice(&unescape::parse_string_literal(symbol.as_str(), mode)); self.bump(); } diff --git a/crates/sema/src/parse.rs b/crates/sema/src/parse.rs index 88512f71..9550fc24 100644 --- a/crates/sema/src/parse.rs +++ b/crates/sema/src/parse.rs @@ -1,6 +1,7 @@ use crate::hir::SourceId; use rayon::prelude::*; use std::{ + borrow::Cow, path::{Path, PathBuf}, sync::Arc, }; @@ -14,7 +15,7 @@ use sulk_interface::{ source_map::{FileName, FileResolver, SourceFile}, Result, Session, }; -use sulk_parse::{Lexer, Parser}; +use sulk_parse::{unescape, Lexer, Parser}; use thread_local::ThreadLocal; pub struct ParsingContext<'sess> { @@ -221,9 +222,11 @@ macro_rules! resolve_imports { } }) .filter_map(move |(id, import, span)| { - // TODO: Unescape - let path_str = import.path.value.as_str(); - let path = Path::new(path_str); + let path_bytes = escape_for_import_path(import.path.value.as_str())?; + let Some(path) = path_from_bytes(&path_bytes[..]) else { + this.dcx().err("import path is not a valid UTF-8 string").span(span).emit(); + return None; + }; this.file_resolver .resolve_file(path, parent.as_deref()) .map_err(|e| this.dcx().err(e.to_string()).span(span).emit()) @@ -234,6 +237,27 @@ macro_rules! resolve_imports { } use resolve_imports; +fn escape_for_import_path(path_str: &str) -> Option> { + let mut any_error = false; + let path_str = + unescape::try_parse_string_literal(path_str, unescape::Mode::Str, |_, _| any_error = true); + if any_error { + return None; + } + Some(path_str) +} + +#[cfg(unix)] +fn path_from_bytes(bytes: &[u8]) -> Option<&Path> { + use std::os::unix::ffi::OsStrExt; + Some(Path::new(std::ffi::OsStr::from_bytes(bytes))) +} + +#[cfg(not(unix))] +fn path_from_bytes(bytes: &[u8]) -> Option<&Path> { + std::str::from_utf8(bytes).ok().map(Path::new) +} + /// Parsed sources, returned by [`ParsingContext::parse`]. #[derive(Default, Debug)] pub struct ParsedSources<'ast> { @@ -242,7 +266,8 @@ pub struct ParsedSources<'ast> { } impl<'ast> ParsedSources<'ast> { - fn new() -> Self { + /// Creates a new empty list of parsed sources. + pub fn new() -> Self { Self { sources: IndexVec::new() } } diff --git a/tests/ui/parser/escaped_import.sol b/tests/ui/parser/escaped_import.sol new file mode 100644 index 00000000..d4352bb6 --- /dev/null +++ b/tests/ui/parser/escaped_import.sol @@ -0,0 +1 @@ +import "\?"; //~ ERROR unknown character escape diff --git a/tests/ui/parser/escaped_import.stderr b/tests/ui/parser/escaped_import.stderr new file mode 100644 index 00000000..ad8d7566 --- /dev/null +++ b/tests/ui/parser/escaped_import.stderr @@ -0,0 +1,9 @@ +error: unknown character escape + --> $DIR/escaped_import.sol:1:10 + | +LL | import "\?"; + | ^ + | + +error: aborting due to 1 previous error +