Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: escape import paths #45

Merged
merged 1 commit into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions crates/parse/src/lexer/unescape/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,14 @@ pub enum Mode {
}

/// Parses a string literal (without quotes) into a byte array.
#[instrument(level = "debug", skip_all)]
pub fn parse_string_literal<F>(src: &str, mode: Mode, f: F) -> Vec<u8>
pub fn parse_string_literal(src: &str, mode: Mode) -> Cow<'_, [u8]> {
try_parse_string_literal(src, mode, |_, _| {})
}

/// Parses a string literal (without quotes) into a byte array.
/// `f` is called for each escape error.
#[instrument(name = "parse_string_literal", level = "debug", skip_all)]
pub fn try_parse_string_literal<F>(src: &str, mode: Mode, f: F) -> Cow<'_, [u8]>
where
F: FnMut(Range<usize>, EscapeError),
{
Expand All @@ -36,7 +42,7 @@ where
bytes = Cow::Owned(decoded);
}
}
bytes.into_owned()
bytes
}

#[cold]
Expand All @@ -58,7 +64,7 @@ where
debug_assert!(dst_buf.is_empty());
debug_assert!(dst_buf.capacity() >= src.len());
let mut dst = unsafe { slice::from_raw_parts_mut(dst_buf.as_mut_ptr(), dst_buf.capacity()) };
unescape_literal(src, mode, |range, res| match res {
unescape_literal_unchecked(src, mode, |range, res| match res {
Ok(c) => {
// NOTE: We can't use `char::encode_utf8` because `c` can be an invalid unicode code.
let written = super::utf8::encode_utf8_raw(c, dst).len();
Expand Down Expand Up @@ -310,7 +316,7 @@ mod tests {
assert_eq!(ok, expected_str, "{panic_str}");

let mut errs2 = Vec::with_capacity(errs.len());
let out = parse_string_literal(src, mode, |range, e| {
let out = try_parse_string_literal(src, mode, |range, e| {
errs2.push((range, e));
});
assert_eq!(errs2, errs, "{panic_str}");
Expand Down
7 changes: 4 additions & 3 deletions crates/parse/src/parser/lit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,14 +151,15 @@ impl<'sess, 'ast> Parser<'sess, 'ast> {
TokenLitKind::HexStr => unescape::Mode::HexStr,
_ => unreachable!(),
};
let parse = |s: Symbol| unescape::parse_string_literal(s.as_str(), mode, |_, _| {});

let mut value = parse(lit.symbol);
let mut value = unescape::parse_string_literal(lit.symbol.as_str(), mode);
while let Some(TokenLit { symbol, kind }) = self.token.lit() {
if kind != lit.kind {
break;
}
value.append(&mut parse(symbol));
value
.to_mut()
.extend_from_slice(&unescape::parse_string_literal(symbol.as_str(), mode));
self.bump();
}

Expand Down
35 changes: 30 additions & 5 deletions crates/sema/src/parse.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::hir::SourceId;
use rayon::prelude::*;
use std::{
borrow::Cow,
path::{Path, PathBuf},
sync::Arc,
};
Expand All @@ -14,7 +15,7 @@ use sulk_interface::{
source_map::{FileName, FileResolver, SourceFile},
Result, Session,
};
use sulk_parse::{Lexer, Parser};
use sulk_parse::{unescape, Lexer, Parser};
use thread_local::ThreadLocal;

pub struct ParsingContext<'sess> {
Expand Down Expand Up @@ -221,9 +222,11 @@ macro_rules! resolve_imports {
}
})
.filter_map(move |(id, import, span)| {
// TODO: Unescape
let path_str = import.path.value.as_str();
let path = Path::new(path_str);
let path_bytes = escape_for_import_path(import.path.value.as_str())?;
let Some(path) = path_from_bytes(&path_bytes[..]) else {
this.dcx().err("import path is not a valid UTF-8 string").span(span).emit();
return None;
};
this.file_resolver
.resolve_file(path, parent.as_deref())
.map_err(|e| this.dcx().err(e.to_string()).span(span).emit())
Expand All @@ -234,6 +237,27 @@ macro_rules! resolve_imports {
}
use resolve_imports;

fn escape_for_import_path(path_str: &str) -> Option<Cow<'_, [u8]>> {
let mut any_error = false;
let path_str =
unescape::try_parse_string_literal(path_str, unescape::Mode::Str, |_, _| any_error = true);
if any_error {
return None;
}
Some(path_str)
}

#[cfg(unix)]
fn path_from_bytes(bytes: &[u8]) -> Option<&Path> {
use std::os::unix::ffi::OsStrExt;
Some(Path::new(std::ffi::OsStr::from_bytes(bytes)))
}

#[cfg(not(unix))]
fn path_from_bytes(bytes: &[u8]) -> Option<&Path> {
std::str::from_utf8(bytes).ok().map(Path::new)
}

/// Parsed sources, returned by [`ParsingContext::parse`].
#[derive(Default, Debug)]
pub struct ParsedSources<'ast> {
Expand All @@ -242,7 +266,8 @@ pub struct ParsedSources<'ast> {
}

impl<'ast> ParsedSources<'ast> {
fn new() -> Self {
/// Creates a new empty list of parsed sources.
pub fn new() -> Self {
Self { sources: IndexVec::new() }
}

Expand Down
1 change: 1 addition & 0 deletions tests/ui/parser/escaped_import.sol
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
import "\?"; //~ ERROR unknown character escape
9 changes: 9 additions & 0 deletions tests/ui/parser/escaped_import.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
error: unknown character escape
--> $DIR/escaped_import.sol:1:10
|
LL | import "\?";
| ^
|

error: aborting due to 1 previous error