Skip to content

Commit

Permalink
Test UTF-8-ness only in #[test]s
Browse files Browse the repository at this point in the history
  • Loading branch information
Kijewski committed Jun 20, 2024
1 parent 5da6866 commit bdc3c79
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 55 deletions.
70 changes: 33 additions & 37 deletions rinja_derive/src/generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2285,29 +2285,12 @@ struct WritePartsBuffers {
expr: Option<Buffer>,
}

// Identifiers to be replaced with raw identifiers, so as to avoid
// collisions between template syntax and Rust's syntax. In particular
// [Rust keywords](https://doc.rust-lang.org/reference/keywords.html)
// should be replaced, since they're not reserved words in Rinja
// syntax but have a high probability of causing problems in the
// generated code.
//
// This list excludes the Rust keywords *self*, *Self*, and *super*
// because they are not allowed to be raw identifiers, and *loop*
// because it's used something like a keyword in the template
// language.
fn normalize_identifier(ident: &str) -> &str {
// This table works for as long as the replacement string is the original string
// prepended with "r#". The strings get right-padded to the same length with b'_'.
// While the code does not need it, please keep the list sorted when adding new
// keywords.

// FIXME: Replace with `[core:ascii::Char; MAX_REPL_LEN]` once
const MAX_KW_LEN: usize = 8;
const MAX_REPL_LEN: usize = MAX_KW_LEN + 2;
const KWS: &[&[[u8; MAX_REPL_LEN]]] = {
// FIXME: Replace `u8` with `[core:ascii::Char; MAX_REPL_LEN]` once
// <https://github.com/rust-lang/rust/issues/110998> is stable.

const MAX_KW_LEN: usize = 8;
const MAX_REPL_LEN: usize = MAX_KW_LEN + 2;

const KW0: &[[u8; MAX_REPL_LEN]] = &[];
const KW1: &[[u8; MAX_REPL_LEN]] = &[];
const KW2: &[[u8; MAX_REPL_LEN]] = &[
Expand Down Expand Up @@ -2365,24 +2348,37 @@ fn normalize_identifier(ident: &str) -> &str {
const KW7: &[[u8; MAX_REPL_LEN]] = &[*b"r#unsized_", *b"r#virtual_"];
const KW8: &[[u8; MAX_REPL_LEN]] = &[*b"r#abstract", *b"r#continue", *b"r#override"];

const KWS: &[&[[u8; MAX_REPL_LEN]]] = &[KW0, KW1, KW2, KW3, KW4, KW5, KW6, KW7, KW8];

// Ensure that all strings are ASCII, because we use `from_utf8_unchecked()` further down.
const _: () = {
let mut i = 0;
while i < KWS.len() {
let mut j = 0;
while KWS[i].len() < j {
let mut k = 0;
while KWS[i][j].len() < k {
assert!(KWS[i][j][k].is_ascii());
k += 1;
}
j += 1;
&[KW0, KW1, KW2, KW3, KW4, KW5, KW6, KW7, KW8]
};

/// Ensure that all strings are UTF-8, because we use `from_utf8_unchecked()` further down.
#[test]
fn ensure_utf8() {
for kws in KWS {
for kw in *kws {
if std::str::from_utf8(kw).is_err() {
panic!("not UTF-8: {:?}", kw);
}
i += 1;
}
};
}
}

/// Identifiers to be replaced with raw identifiers, so as to avoid
/// collisions between template syntax and Rust's syntax. In particular
/// [Rust keywords](https://doc.rust-lang.org/reference/keywords.html)
/// should be replaced, since they're not reserved words in Rinja
/// syntax but have a high probability of causing problems in the
/// generated code.
///
/// This list excludes the Rust keywords *self*, *Self*, and *super*
/// because they are not allowed to be raw identifiers, and *loop*
/// because it's used something like a keyword in the template
/// language.
fn normalize_identifier(ident: &str) -> &str {
// This table works for as long as the replacement string is the original string
// prepended with "r#". The strings get right-padded to the same length with b'_'.
// While the code does not need it, please keep the list sorted when adding new
// keywords.

if ident.len() > MAX_KW_LEN {
return ident;
Expand Down
35 changes: 17 additions & 18 deletions rinja_parser/src/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1203,8 +1203,10 @@ impl<'a> Comment<'a> {
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct Ws(pub Option<Whitespace>, pub Option<Whitespace>);

fn is_rust_keyword(ident: &str) -> bool {
const MAX_KW_LEN: usize = 8;
const MAX_KW_LEN: usize = 8;
const KWS: &[&[[u8; MAX_KW_LEN]]] = {
// FIXME: Replace `u8` with `[core:ascii::Char; MAX_REPL_LEN]` once
// <https://github.com/rust-lang/rust/issues/110998> is stable.

const KW0: &[[u8; MAX_KW_LEN]] = &[];
const KW1: &[[u8; MAX_KW_LEN]] = &[];
Expand Down Expand Up @@ -1268,25 +1270,22 @@ fn is_rust_keyword(ident: &str) -> bool {
const KW7: &[[u8; MAX_KW_LEN]] = &[*b"unsized_", *b"virtual_"];
const KW8: &[[u8; MAX_KW_LEN]] = &[*b"abstract", *b"continue", *b"override"];

const KWS: &[&[[u8; MAX_KW_LEN]]] = &[KW0, KW1, KW2, KW3, KW4, KW5, KW6, KW7, KW8];

// Ensure that all strings are ASCII, because we use `from_utf8_unchecked()` further down.
const _: () = {
let mut i = 0;
while i < KWS.len() {
let mut j = 0;
while KWS[i].len() < j {
let mut k = 0;
while KWS[i][j].len() < k {
assert!(KWS[i][j][k].is_ascii());
k += 1;
}
j += 1;
&[KW0, KW1, KW2, KW3, KW4, KW5, KW6, KW7, KW8]
};

/// Ensure that all strings are UTF-8, because we use `from_utf8_unchecked()` further down.
#[test]
fn ensure_utf8() {
for kws in KWS {
for kw in *kws {
if std::str::from_utf8(kw).is_err() {
panic!("not UTF-8: {:?}", kw);
}
i += 1;
}
};
}
}

fn is_rust_keyword(ident: &str) -> bool {
if ident.len() > MAX_KW_LEN {
return false;
}
Expand Down

0 comments on commit bdc3c79

Please sign in to comment.