Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

std.zig.tokenizer: simplification and spec conformance #20885

Merged
merged 3 commits into from
Aug 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions lib/std/crypto/ml_kem.zig
Original file line number Diff line number Diff line change
Expand Up @@ -677,10 +677,10 @@ fn montReduce(x: i32) i16 {
// Note gcd(2¹⁶, q) = 1 as q is prime. Write q' := 62209 = q⁻¹ mod R.
// First we compute
//
// m := ((x mod R) q') mod R
// m := ((x mod R) q') mod R
// = x q' mod R
// = int16(x q')
// = int16(int32(x) * int32(q'))
// = int16(x q')
// = int16(int32(x) * int32(q'))
//
// Note that x q' might be as big as 2³² and could overflow the int32
// multiplication in the last line. However for any int32s a and b,
Expand Down
9 changes: 4 additions & 5 deletions lib/std/macho.zig
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,7 @@ pub const symtab_command = extern struct {
/// local symbols (static and debugging symbols) - grouped by module
/// defined external symbols - grouped by module (sorted by name if not lib)
/// undefined external symbols (sorted by name if MH_BINDATLOAD is not set,
/// and in order the were seen by the static
/// linker if MH_BINDATLOAD is set)
/// and in order the were seen by the static linker if MH_BINDATLOAD is set)
/// In this load command there are offsets and counts to each of the three groups
/// of symbols.
///
Expand All @@ -219,9 +218,9 @@ pub const symtab_command = extern struct {
/// shared library. For executable and object modules, which are files
/// containing only one module, the information that would be in these three
/// tables is determined as follows:
/// table of contents - the defined external symbols are sorted by name
/// module table - the file contains only one module so everything in the
/// file is part of the module.
/// table of contents - the defined external symbols are sorted by name
/// module table - the file contains only one module so everything in the file
/// is part of the module.
/// reference symbol table - is the defined and undefined external symbols
///
/// For dynamically linked shared library files this load command also contains
Expand Down
33 changes: 14 additions & 19 deletions lib/std/unicode.zig
Original file line number Diff line number Diff line change
Expand Up @@ -95,16 +95,13 @@ pub inline fn utf8EncodeComptime(comptime c: u21) [

const Utf8DecodeError = Utf8Decode2Error || Utf8Decode3Error || Utf8Decode4Error;

/// Decodes the UTF-8 codepoint encoded in the given slice of bytes.
/// bytes.len must be equal to utf8ByteSequenceLength(bytes[0]) catch unreachable.
/// If you already know the length at comptime, you can call one of
/// utf8Decode2,utf8Decode3,utf8Decode4 directly instead of this function.
/// Deprecated. This function has an awkward API that is too easy to use incorrectly.
pub fn utf8Decode(bytes: []const u8) Utf8DecodeError!u21 {
return switch (bytes.len) {
1 => @as(u21, bytes[0]),
2 => utf8Decode2(bytes),
3 => utf8Decode3(bytes),
4 => utf8Decode4(bytes),
1 => bytes[0],
2 => utf8Decode2(bytes[0..2].*),
3 => utf8Decode3(bytes[0..3].*),
4 => utf8Decode4(bytes[0..4].*),
else => unreachable,
};
}
Expand All @@ -113,8 +110,7 @@ const Utf8Decode2Error = error{
Utf8ExpectedContinuation,
Utf8OverlongEncoding,
};
pub fn utf8Decode2(bytes: []const u8) Utf8Decode2Error!u21 {
assert(bytes.len == 2);
pub fn utf8Decode2(bytes: [2]u8) Utf8Decode2Error!u21 {
assert(bytes[0] & 0b11100000 == 0b11000000);
var value: u21 = bytes[0] & 0b00011111;

Expand All @@ -130,7 +126,7 @@ pub fn utf8Decode2(bytes: []const u8) Utf8Decode2Error!u21 {
const Utf8Decode3Error = Utf8Decode3AllowSurrogateHalfError || error{
Utf8EncodesSurrogateHalf,
};
pub fn utf8Decode3(bytes: []const u8) Utf8Decode3Error!u21 {
pub fn utf8Decode3(bytes: [3]u8) Utf8Decode3Error!u21 {
const value = try utf8Decode3AllowSurrogateHalf(bytes);

if (0xd800 <= value and value <= 0xdfff) return error.Utf8EncodesSurrogateHalf;
Expand All @@ -142,8 +138,7 @@ const Utf8Decode3AllowSurrogateHalfError = error{
Utf8ExpectedContinuation,
Utf8OverlongEncoding,
};
pub fn utf8Decode3AllowSurrogateHalf(bytes: []const u8) Utf8Decode3AllowSurrogateHalfError!u21 {
assert(bytes.len == 3);
pub fn utf8Decode3AllowSurrogateHalf(bytes: [3]u8) Utf8Decode3AllowSurrogateHalfError!u21 {
assert(bytes[0] & 0b11110000 == 0b11100000);
var value: u21 = bytes[0] & 0b00001111;

Expand All @@ -165,8 +160,7 @@ const Utf8Decode4Error = error{
Utf8OverlongEncoding,
Utf8CodepointTooLarge,
};
pub fn utf8Decode4(bytes: []const u8) Utf8Decode4Error!u21 {
assert(bytes.len == 4);
pub fn utf8Decode4(bytes: [4]u8) Utf8Decode4Error!u21 {
assert(bytes[0] & 0b11111000 == 0b11110000);
var value: u21 = bytes[0] & 0b00000111;

Expand Down Expand Up @@ -1637,12 +1631,13 @@ pub fn wtf8Encode(c: u21, out: []u8) error{CodepointTooLarge}!u3 {

const Wtf8DecodeError = Utf8Decode2Error || Utf8Decode3AllowSurrogateHalfError || Utf8Decode4Error;

/// Deprecated. This function has an awkward API that is too easy to use incorrectly.
pub fn wtf8Decode(bytes: []const u8) Wtf8DecodeError!u21 {
return switch (bytes.len) {
1 => @as(u21, bytes[0]),
2 => utf8Decode2(bytes),
3 => utf8Decode3AllowSurrogateHalf(bytes),
4 => utf8Decode4(bytes),
1 => bytes[0],
2 => utf8Decode2(bytes[0..2].*),
3 => utf8Decode3AllowSurrogateHalf(bytes[0..3].*),
4 => utf8Decode4(bytes[0..4].*),
else => unreachable,
};
}
Expand Down
2 changes: 1 addition & 1 deletion lib/std/zig/Ast.zig
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ pub fn parse(gpa: Allocator, source: [:0]const u8, mode: Mode) Allocator.Error!A
const token = tokenizer.next();
try tokens.append(gpa, .{
.tag = token.tag,
.start = @as(u32, @intCast(token.loc.start)),
.start = @intCast(token.loc.start),
});
if (token.tag == .eof) break;
}
Expand Down
15 changes: 3 additions & 12 deletions lib/std/zig/AstGen.zig
Original file line number Diff line number Diff line change
Expand Up @@ -11351,6 +11351,9 @@ fn failWithStrLitError(astgen: *AstGen, err: std.zig.string_literal.Error, token
.{raw_string[bad_index]},
);
},
.empty_char_literal => {
return astgen.failOff(token, offset, "empty character literal", .{});
},
}
}

Expand Down Expand Up @@ -13820,21 +13823,9 @@ fn lowerAstErrors(astgen: *AstGen) !void {
var msg: std.ArrayListUnmanaged(u8) = .{};
defer msg.deinit(gpa);

const token_starts = tree.tokens.items(.start);
const token_tags = tree.tokens.items(.tag);

var notes: std.ArrayListUnmanaged(u32) = .{};
defer notes.deinit(gpa);

const tok = parse_err.token + @intFromBool(parse_err.token_is_prev);
if (token_tags[tok] == .invalid) {
const bad_off: u32 = @intCast(tree.tokenSlice(tok).len);
const byte_abs = token_starts[tok] + bad_off;
try notes.append(gpa, try astgen.errNoteTokOff(tok, bad_off, "invalid byte: '{'}'", .{
std.zig.fmtEscapes(tree.source[byte_abs..][0..1]),
}));
}

for (tree.errors[1..]) |note| {
if (!note.is_note) break;

Expand Down
1 change: 0 additions & 1 deletion lib/std/zig/parser_test.zig
Original file line number Diff line number Diff line change
Expand Up @@ -6061,7 +6061,6 @@ test "recovery: invalid container members" {
, &[_]Error{
.expected_expr,
.expected_comma_after_field,
.expected_type_expr,
.expected_semi_after_stmt,
});
}
Expand Down
24 changes: 19 additions & 5 deletions lib/std/zig/string_literal.zig
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
const std = @import("../std.zig");
const assert = std.debug.assert;
const utf8Decode = std.unicode.utf8Decode;
const utf8Encode = std.unicode.utf8Encode;

pub const ParseError = error{
Expand Down Expand Up @@ -37,12 +36,16 @@ pub const Error = union(enum) {
expected_single_quote: usize,
/// The character at this index cannot be represented without an escape sequence.
invalid_character: usize,
/// `''`. Not returned for string literals.
empty_char_literal,
};

/// Only validates escape sequence characters.
/// Slice must be valid utf8 starting and ending with "'" and exactly one codepoint in between.
/// Asserts the slice starts and ends with single-quotes.
/// Returns an error if there is not exactly one UTF-8 codepoint in between.
pub fn parseCharLiteral(slice: []const u8) ParsedCharLiteral {
assert(slice.len >= 3 and slice[0] == '\'' and slice[slice.len - 1] == '\'');
if (slice.len < 3) return .{ .failure = .empty_char_literal };
assert(slice[0] == '\'');
assert(slice[slice.len - 1] == '\'');

switch (slice[1]) {
'\\' => {
Expand All @@ -55,7 +58,18 @@ pub fn parseCharLiteral(slice: []const u8) ParsedCharLiteral {
},
0 => return .{ .failure = .{ .invalid_character = 1 } },
else => {
const codepoint = utf8Decode(slice[1 .. slice.len - 1]) catch unreachable;
const inner = slice[1 .. slice.len - 1];
const n = std.unicode.utf8ByteSequenceLength(inner[0]) catch return .{
.failure = .{ .invalid_unicode_codepoint = 1 },
};
if (inner.len > n) return .{ .failure = .{ .expected_single_quote = 1 + n } };
const codepoint = switch (n) {
1 => inner[0],
2 => std.unicode.utf8Decode2(inner[0..2].*),
3 => std.unicode.utf8Decode3(inner[0..3].*),
4 => std.unicode.utf8Decode4(inner[0..4].*),
else => unreachable,
} catch return .{ .failure = .{ .invalid_unicode_codepoint = 1 } };
return .{ .success = codepoint };
},
}
Expand Down
92 changes: 46 additions & 46 deletions lib/std/zig/system/darwin/macos.zig
Original file line number Diff line number Diff line change
Expand Up @@ -303,16 +303,16 @@ test "detect" {
\\<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
\\<plist version="1.0">
\\<dict>
\\ <key>ProductBuildVersion</key>
\\ <string>7W98</string>
\\ <key>ProductCopyright</key>
\\ <string>Apple Computer, Inc. 1983-2004</string>
\\ <key>ProductName</key>
\\ <string>Mac OS X</string>
\\ <key>ProductUserVisibleVersion</key>
\\ <string>10.3.9</string>
\\ <key>ProductVersion</key>
\\ <string>10.3.9</string>
\\ <key>ProductBuildVersion</key>
\\ <string>7W98</string>
\\ <key>ProductCopyright</key>
\\ <string>Apple Computer, Inc. 1983-2004</string>
\\ <key>ProductName</key>
\\ <string>Mac OS X</string>
\\ <key>ProductUserVisibleVersion</key>
\\ <string>10.3.9</string>
\\ <key>ProductVersion</key>
\\ <string>10.3.9</string>
\\</dict>
\\</plist>
,
Expand All @@ -323,18 +323,18 @@ test "detect" {
\\<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
\\<plist version="1.0">
\\<dict>
\\ <key>ProductBuildVersion</key>
\\ <string>19G68</string>
\\ <key>ProductCopyright</key>
\\ <string>1983-2020 Apple Inc.</string>
\\ <key>ProductName</key>
\\ <string>Mac OS X</string>
\\ <key>ProductUserVisibleVersion</key>
\\ <string>10.15.6</string>
\\ <key>ProductVersion</key>
\\ <string>10.15.6</string>
\\ <key>iOSSupportVersion</key>
\\ <string>13.6</string>
\\ <key>ProductBuildVersion</key>
\\ <string>19G68</string>
\\ <key>ProductCopyright</key>
\\ <string>1983-2020 Apple Inc.</string>
\\ <key>ProductName</key>
\\ <string>Mac OS X</string>
\\ <key>ProductUserVisibleVersion</key>
\\ <string>10.15.6</string>
\\ <key>ProductVersion</key>
\\ <string>10.15.6</string>
\\ <key>iOSSupportVersion</key>
\\ <string>13.6</string>
\\</dict>
\\</plist>
,
Expand All @@ -345,18 +345,18 @@ test "detect" {
\\<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
\\<plist version="1.0">
\\<dict>
\\ <key>ProductBuildVersion</key>
\\ <string>20A2408</string>
\\ <key>ProductCopyright</key>
\\ <string>1983-2020 Apple Inc.</string>
\\ <key>ProductName</key>
\\ <string>macOS</string>
\\ <key>ProductUserVisibleVersion</key>
\\ <string>11.0</string>
\\ <key>ProductVersion</key>
\\ <string>11.0</string>
\\ <key>iOSSupportVersion</key>
\\ <string>14.2</string>
\\ <key>ProductBuildVersion</key>
\\ <string>20A2408</string>
\\ <key>ProductCopyright</key>
\\ <string>1983-2020 Apple Inc.</string>
\\ <key>ProductName</key>
\\ <string>macOS</string>
\\ <key>ProductUserVisibleVersion</key>
\\ <string>11.0</string>
\\ <key>ProductVersion</key>
\\ <string>11.0</string>
\\ <key>iOSSupportVersion</key>
\\ <string>14.2</string>
\\</dict>
\\</plist>
,
Expand All @@ -367,18 +367,18 @@ test "detect" {
\\<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
\\<plist version="1.0">
\\<dict>
\\ <key>ProductBuildVersion</key>
\\ <string>20C63</string>
\\ <key>ProductCopyright</key>
\\ <string>1983-2020 Apple Inc.</string>
\\ <key>ProductName</key>
\\ <string>macOS</string>
\\ <key>ProductUserVisibleVersion</key>
\\ <string>11.1</string>
\\ <key>ProductVersion</key>
\\ <string>11.1</string>
\\ <key>iOSSupportVersion</key>
\\ <string>14.3</string>
\\ <key>ProductBuildVersion</key>
\\ <string>20C63</string>
\\ <key>ProductCopyright</key>
\\ <string>1983-2020 Apple Inc.</string>
\\ <key>ProductName</key>
\\ <string>macOS</string>
\\ <key>ProductUserVisibleVersion</key>
\\ <string>11.1</string>
\\ <key>ProductVersion</key>
\\ <string>11.1</string>
\\ <key>iOSSupportVersion</key>
\\ <string>14.3</string>
\\</dict>
\\</plist>
,
Expand Down
Loading