-
Notifications
You must be signed in to change notification settings - Fork 157
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
698: Implement Byte Strings r=philberty a=philberty Byte strings are not str's they are arrays of [u8; capacity], this preserves their type guarantees as a byte string. This patch merges work from Mark to implement the correct typing, the missing piece was that each implicit type needed its own implicit id, other wise their is a loop in looking up the covariant types. Fixes #697 Co-authored-by: Mark Wielaard <mark@klomp.org> 701: Fix lexer to not produce bad unicode escape values r=philberty a=CohenArthur There were a couple of issues in the lexer unicode escape code. Unicode escape sequences must always start with an opening curly bracket (and end with a closing one). Underscores are not allowed as starting character. And the produced values must be unicode scalar values, which excludes surrogate values (D800 to DFFF) or values larger than 10FFFF. Also try to recover more gracefully from errors by trying to skip past any bad characters to the end of the escape sequence. Test all of the above in a new testcase unicode_escape.rs. Patch: https://git.sr.ht/~mjw/gccrs/commit/unicode_escape Mail: https://gcc.gnu.org/pipermail/gcc-rust/2021-October/000231.html Co-authored-by: Philip Herron <philip.herron@embecosm.com> Co-authored-by: Mark Wielaard <mark@klomp.org>
- Loading branch information
Showing
5 changed files
with
216 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
pub fn main() { | ||
let a: &[u8; 4]; | ||
a = b"test"; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
fn main () | ||
{ | ||
// Braces are required | ||
let _cbl = '\u013'; // { dg-error "unicode escape" } | ||
let _sbl = "\u013"; //{ dg-error "unicode escape" } | ||
|
||
// One to six hex digits | ||
let _c0 = '\u{}'; // { dg-error "unicode escape" } | ||
let _c1 = '\u{0}'; | ||
let _c2 = '\u{00}'; | ||
let _c3 = '\u{000}'; | ||
let _c4 = '\u{0000}'; | ||
let _c5 = '\u{00000}'; | ||
let _c6 = '\u{000000}'; | ||
let _c7 = '\u{0000000}'; // { dg-error "unicode escape" } | ||
|
||
let _s0 = "\u{}"; // { dg-error "unicode escape" } | ||
let _s1 = "\u{0}"; | ||
let _s2 = "\u{00}"; | ||
let _s3 = "\u{000}"; | ||
let _s4 = "\u{0000}"; | ||
let _s5 = "\u{00000}"; | ||
let _s6 = "\u{000000}"; | ||
let _s7 = "\u{0000000}"; // { dg-error "unicode escape" } | ||
|
||
// Underscores OK except for start | ||
let _c_ = '\u{00___01__0_1_}'; | ||
let _s_ = "\u{00___01__0_1_}"; | ||
let _c__ = '\u{_00__01__0_}'; // { dg-error "unicode escape" } | ||
let _s__ = "\u{_00__01__0_}"; // { dg-error "unicode escape" } | ||
|
||
// Must be hex chars | ||
let _chex = '\u{hex}'; // { dg-error "unicode escape" } | ||
let _shex = '\u{hex}'; // { dg-error "unicode escape" } | ||
|
||
// Only valid from 0x0 to 0xD7FF and from 0xE000 to 0x10FFF | ||
let _cd7ff = '\u{D7FF}'; | ||
let _sd7ff = "\u{D7FF}"; | ||
let _cd800 = '\u{D800}'; // { dg-error "unicode escape" } | ||
let _sd800 = "\u{D800}"; // { dg-error "unicode escape" } | ||
|
||
let _cdfff = '\u{DFFF}'; // { dg-error "unicode escape" } | ||
let _sdfff = "\u{DFFF}"; // { dg-error "unicode escape" } | ||
let _ce000 = '\u{E000}'; | ||
let _se000 = "\u{E000}"; | ||
|
||
let _clast = '\u{10FFFF}'; | ||
let _slast = "\u{10FFFF}"; | ||
let _clast1 = '\u{110000}'; // { dg-error "unicode escape" } | ||
let _slast1 = "\u{110000}"; // { dg-error "unicode escape" } | ||
|
||
let _cffffff = '\u{FFFFFF}'; // { dg-error "unicode escape" } | ||
let _sffffff = "\u{FFFFFF}"; // { dg-error "unicode escape" } | ||
|
||
// unicode escapes cannot be used in bytes or byte strings. | ||
// Except in raw byte strings (where they aren't escapes). | ||
let _bc = b'\u{000A}'; // { dg-error "unicode escape" } | ||
let _bs = b"\u{000A}"; // { dg-error "unicode escape" } | ||
let _rbs = br"\u{000A}"; | ||
} |