From bdc3c79e95ad52f8b79c50ceeecfecb25407e9b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Kijewski?= Date: Thu, 20 Jun 2024 22:02:39 +0200 Subject: [PATCH] Test UTF-8-ness only in #[test]s --- rinja_derive/src/generator.rs | 70 +++++++++++++++++------------------ rinja_parser/src/node.rs | 35 +++++++++--------- 2 files changed, 50 insertions(+), 55 deletions(-) diff --git a/rinja_derive/src/generator.rs b/rinja_derive/src/generator.rs index 1815ad608..264c60227 100644 --- a/rinja_derive/src/generator.rs +++ b/rinja_derive/src/generator.rs @@ -2285,29 +2285,12 @@ struct WritePartsBuffers { expr: Option, } -// Identifiers to be replaced with raw identifiers, so as to avoid -// collisions between template syntax and Rust's syntax. In particular -// [Rust keywords](https://doc.rust-lang.org/reference/keywords.html) -// should be replaced, since they're not reserved words in Rinja -// syntax but have a high probability of causing problems in the -// generated code. -// -// This list excludes the Rust keywords *self*, *Self*, and *super* -// because they are not allowed to be raw identifiers, and *loop* -// because it's used something like a keyword in the template -// language. -fn normalize_identifier(ident: &str) -> &str { - // This table works for as long as the replacement string is the original string - // prepended with "r#". The strings get right-padded to the same length with b'_'. - // While the code does not need it, please keep the list sorted when adding new - // keywords. - - // FIXME: Replace with `[core:ascii::Char; MAX_REPL_LEN]` once +const MAX_KW_LEN: usize = 8; +const MAX_REPL_LEN: usize = MAX_KW_LEN + 2; +const KWS: &[&[[u8; MAX_REPL_LEN]]] = { + // FIXME: Replace `u8` with `[core:ascii::Char; MAX_REPL_LEN]` once // is stable. - const MAX_KW_LEN: usize = 8; - const MAX_REPL_LEN: usize = MAX_KW_LEN + 2; - const KW0: &[[u8; MAX_REPL_LEN]] = &[]; const KW1: &[[u8; MAX_REPL_LEN]] = &[]; const KW2: &[[u8; MAX_REPL_LEN]] = &[ @@ -2365,24 +2348,37 @@ fn normalize_identifier(ident: &str) -> &str { const KW7: &[[u8; MAX_REPL_LEN]] = &[*b"r#unsized_", *b"r#virtual_"]; const KW8: &[[u8; MAX_REPL_LEN]] = &[*b"r#abstract", *b"r#continue", *b"r#override"]; - const KWS: &[&[[u8; MAX_REPL_LEN]]] = &[KW0, KW1, KW2, KW3, KW4, KW5, KW6, KW7, KW8]; - - // Ensure that all strings are ASCII, because we use `from_utf8_unchecked()` further down. - const _: () = { - let mut i = 0; - while i < KWS.len() { - let mut j = 0; - while KWS[i].len() < j { - let mut k = 0; - while KWS[i][j].len() < k { - assert!(KWS[i][j][k].is_ascii()); - k += 1; - } - j += 1; + &[KW0, KW1, KW2, KW3, KW4, KW5, KW6, KW7, KW8] +}; + +/// Ensure that all strings are UTF-8, because we use `from_utf8_unchecked()` further down. +#[test] +fn ensure_utf8() { + for kws in KWS { + for kw in *kws { + if std::str::from_utf8(kw).is_err() { + panic!("not UTF-8: {:?}", kw); } - i += 1; } - }; + } +} + +/// Identifiers to be replaced with raw identifiers, so as to avoid +/// collisions between template syntax and Rust's syntax. In particular +/// [Rust keywords](https://doc.rust-lang.org/reference/keywords.html) +/// should be replaced, since they're not reserved words in Rinja +/// syntax but have a high probability of causing problems in the +/// generated code. +/// +/// This list excludes the Rust keywords *self*, *Self*, and *super* +/// because they are not allowed to be raw identifiers, and *loop* +/// because it's used something like a keyword in the template +/// language. +fn normalize_identifier(ident: &str) -> &str { + // This table works for as long as the replacement string is the original string + // prepended with "r#". The strings get right-padded to the same length with b'_'. + // While the code does not need it, please keep the list sorted when adding new + // keywords. if ident.len() > MAX_KW_LEN { return ident; diff --git a/rinja_parser/src/node.rs b/rinja_parser/src/node.rs index 0ccd8dd05..6bbd495e4 100644 --- a/rinja_parser/src/node.rs +++ b/rinja_parser/src/node.rs @@ -1203,8 +1203,10 @@ impl<'a> Comment<'a> { #[derive(Clone, Copy, Debug, PartialEq)] pub struct Ws(pub Option, pub Option); -fn is_rust_keyword(ident: &str) -> bool { - const MAX_KW_LEN: usize = 8; +const MAX_KW_LEN: usize = 8; +const KWS: &[&[[u8; MAX_KW_LEN]]] = { + // FIXME: Replace `u8` with `[core:ascii::Char; MAX_REPL_LEN]` once + // is stable. const KW0: &[[u8; MAX_KW_LEN]] = &[]; const KW1: &[[u8; MAX_KW_LEN]] = &[]; @@ -1268,25 +1270,22 @@ fn is_rust_keyword(ident: &str) -> bool { const KW7: &[[u8; MAX_KW_LEN]] = &[*b"unsized_", *b"virtual_"]; const KW8: &[[u8; MAX_KW_LEN]] = &[*b"abstract", *b"continue", *b"override"]; - const KWS: &[&[[u8; MAX_KW_LEN]]] = &[KW0, KW1, KW2, KW3, KW4, KW5, KW6, KW7, KW8]; - - // Ensure that all strings are ASCII, because we use `from_utf8_unchecked()` further down. - const _: () = { - let mut i = 0; - while i < KWS.len() { - let mut j = 0; - while KWS[i].len() < j { - let mut k = 0; - while KWS[i][j].len() < k { - assert!(KWS[i][j][k].is_ascii()); - k += 1; - } - j += 1; + &[KW0, KW1, KW2, KW3, KW4, KW5, KW6, KW7, KW8] +}; + +/// Ensure that all strings are UTF-8, because we use `from_utf8_unchecked()` further down. +#[test] +fn ensure_utf8() { + for kws in KWS { + for kw in *kws { + if std::str::from_utf8(kw).is_err() { + panic!("not UTF-8: {:?}", kw); } - i += 1; } - }; + } +} +fn is_rust_keyword(ident: &str) -> bool { if ident.len() > MAX_KW_LEN { return false; }