Skip to content

Commit

Permalink
feat: escape import paths
Browse files Browse the repository at this point in the history
  • Loading branch information
DaniPopes committed Aug 2, 2024
1 parent fa7a3ca commit e542fe5
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 13 deletions.
16 changes: 11 additions & 5 deletions crates/parse/src/lexer/unescape/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,14 @@ pub enum Mode {
}

/// Parses a string literal (without quotes) into a byte array.
#[instrument(level = "debug", skip_all)]
pub fn parse_string_literal<F>(src: &str, mode: Mode, f: F) -> Vec<u8>
pub fn parse_string_literal(src: &str, mode: Mode) -> Cow<'_, [u8]> {
try_parse_string_literal(src, mode, |_, _| {})
}

/// Parses a string literal (without quotes) into a byte array.
/// `f` is called for each escape error.
#[instrument(name = "parse_string_literal", level = "debug", skip_all)]
pub fn try_parse_string_literal<F>(src: &str, mode: Mode, f: F) -> Cow<'_, [u8]>
where
F: FnMut(Range<usize>, EscapeError),
{
Expand All @@ -36,7 +42,7 @@ where
bytes = Cow::Owned(decoded);
}
}
bytes.into_owned()
bytes
}

#[cold]
Expand All @@ -58,7 +64,7 @@ where
debug_assert!(dst_buf.is_empty());
debug_assert!(dst_buf.capacity() >= src.len());
let mut dst = unsafe { slice::from_raw_parts_mut(dst_buf.as_mut_ptr(), dst_buf.capacity()) };
unescape_literal(src, mode, |range, res| match res {
unescape_literal_unchecked(src, mode, |range, res| match res {
Ok(c) => {
// NOTE: We can't use `char::encode_utf8` because `c` can be an invalid unicode code.
let written = super::utf8::encode_utf8_raw(c, dst).len();
Expand Down Expand Up @@ -310,7 +316,7 @@ mod tests {
assert_eq!(ok, expected_str, "{panic_str}");

let mut errs2 = Vec::with_capacity(errs.len());
let out = parse_string_literal(src, mode, |range, e| {
let out = try_parse_string_literal(src, mode, |range, e| {
errs2.push((range, e));
});
assert_eq!(errs2, errs, "{panic_str}");
Expand Down
7 changes: 4 additions & 3 deletions crates/parse/src/parser/lit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,14 +151,15 @@ impl<'sess, 'ast> Parser<'sess, 'ast> {
TokenLitKind::HexStr => unescape::Mode::HexStr,
_ => unreachable!(),
};
let parse = |s: Symbol| unescape::parse_string_literal(s.as_str(), mode, |_, _| {});

let mut value = parse(lit.symbol);
let mut value = unescape::parse_string_literal(lit.symbol.as_str(), mode);
while let Some(TokenLit { symbol, kind }) = self.token.lit() {
if kind != lit.kind {
break;
}
value.append(&mut parse(symbol));
value
.to_mut()
.extend_from_slice(&unescape::parse_string_literal(symbol.as_str(), mode));
self.bump();
}

Expand Down
35 changes: 30 additions & 5 deletions crates/sema/src/parse.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::hir::SourceId;
use rayon::prelude::*;
use std::{
borrow::Cow,
path::{Path, PathBuf},
sync::Arc,
};
Expand All @@ -14,7 +15,7 @@ use sulk_interface::{
source_map::{FileName, FileResolver, SourceFile},
Result, Session,
};
use sulk_parse::{Lexer, Parser};
use sulk_parse::{unescape, Lexer, Parser};
use thread_local::ThreadLocal;

pub struct ParsingContext<'sess> {
Expand Down Expand Up @@ -221,9 +222,11 @@ macro_rules! resolve_imports {
}
})
.filter_map(move |(id, import, span)| {
// TODO: Unescape
let path_str = import.path.value.as_str();
let path = Path::new(path_str);
let path_bytes = escape_for_import_path(import.path.value.as_str())?;
let Some(path) = path_from_bytes(&path_bytes[..]) else {
this.dcx().err("import path is not a valid UTF-8 string").span(span).emit();
return None;
};
this.file_resolver
.resolve_file(path, parent.as_deref())
.map_err(|e| this.dcx().err(e.to_string()).span(span).emit())
Expand All @@ -234,6 +237,27 @@ macro_rules! resolve_imports {
}
use resolve_imports;

fn escape_for_import_path(path_str: &str) -> Option<Cow<'_, [u8]>> {
let mut any_error = false;
let path_str =
unescape::try_parse_string_literal(path_str, unescape::Mode::Str, |_, _| any_error = true);
if any_error {
return None;
}
Some(path_str)
}

#[cfg(unix)]
fn path_from_bytes(bytes: &[u8]) -> Option<&Path> {
use std::os::unix::ffi::OsStrExt;
Some(Path::new(std::ffi::OsStr::from_bytes(bytes)))
}

#[cfg(not(unix))]
fn path_from_bytes(bytes: &[u8]) -> Option<&Path> {
std::str::from_utf8(bytes).ok().map(Path::new)
}

/// Parsed sources, returned by [`ParsingContext::parse`].
#[derive(Default, Debug)]
pub struct ParsedSources<'ast> {
Expand All @@ -242,7 +266,8 @@ pub struct ParsedSources<'ast> {
}

impl<'ast> ParsedSources<'ast> {
fn new() -> Self {
/// Creates a new empty list of parsed sources.
pub fn new() -> Self {
Self { sources: IndexVec::new() }
}

Expand Down
1 change: 1 addition & 0 deletions tests/ui/parser/escaped_import.sol
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
import "\?"; //~ ERROR unknown character escape
9 changes: 9 additions & 0 deletions tests/ui/parser/escaped_import.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
error: unknown character escape
--> $DIR/escaped_import.sol:1:10
|
LL | import "\?";
| ^
|

error: aborting due to 1 previous error

0 comments on commit e542fe5

Please sign in to comment.