Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add -fdollars-in-identifiers and -fno-dollars-in-identifiers option #152

Merged
merged 3 commits into from
Dec 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/Diagnostics.zig
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ pub const Options = struct {
@"unicode-zero-width": ?Kind = null,
@"unicode-homoglyph": ?Kind = null,
@"return-type": ?Kind = null,
@"dollar-in-identifier-extension": ?Kind = null,
};

const messages = struct {
Expand Down Expand Up @@ -1265,6 +1266,16 @@ const messages = struct {
const extra = .str;
const kind = .@"error";
};
const dollar_in_identifier_extension = struct {
const msg = "'$' in identifier";
const opt = "dollar-in-identifier-extension";
const kind = .off;
const suppress_language_option = "dollars_in_identifiers";
};
const dollars_in_identifiers = struct {
const msg = "illegal character '$' in identifier";
const kind = .@"error";
};
};

list: std.ArrayList(Message),
Expand Down Expand Up @@ -1464,6 +1475,7 @@ fn tagKind(diag: *Diagnostics, tag: Tag) Kind {
}
if (@hasDecl(info, "suppress_version")) if (comp.langopts.standard.atLeast(info.suppress_version)) return .off;
if (@hasDecl(info, "suppress_gnu")) if (comp.langopts.standard.isExplicitGNU()) return .off;
if (@hasDecl(info, "suppress_language_option")) if (!@field(comp.langopts, info.suppress_language_option)) return .off;
if (kind == .@"error" and diag.fatal_errors) kind = .@"fatal error";
return kind;
}
Expand Down
1 change: 1 addition & 0 deletions src/LangOpts.zig
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ const Standard = enum {
standard: Standard = .default,
/// -fshort-enums option, makes enums only take up as much space as they need to hold all the values.
short_enums: bool = false,
dollars_in_identifiers: bool = true,

pub fn setStandard(self: *LangOpts, name: []const u8) error{InvalidStandard}!void {
self.standard = Standard.NameMap.get(name) orelse return error.InvalidStandard;
Expand Down
68 changes: 37 additions & 31 deletions src/Parser.zig
Original file line number Diff line number Diff line change
Expand Up @@ -136,21 +136,41 @@ fn checkIdentifierCodepoint(comp: *Compilation, codepoint: u21, loc: Source.Loca
}

fn eatIdentifier(p: *Parser) !?TokenIndex {
if (p.tok_ids[p.tok_i] == .identifier) {
defer p.tok_i += 1;
return p.tok_i;
} else if (p.tok_ids[p.tok_i] == .extended_identifier) {
defer p.tok_i += 1;
const slice = p.tokSlice(p.tok_i);
var it = std.unicode.Utf8View.initUnchecked(slice).iterator();
var loc = p.pp.tokens.items(.loc)[p.tok_i];
while (it.nextCodepoint()) |c| {
if (try checkIdentifierCodepoint(p.pp.comp, c, loc)) break;
loc.byte_offset += std.unicode.utf8CodepointSequenceLength(c) catch unreachable;
switch (p.tok_ids[p.tok_i]) {
.identifier => {},
.extended_identifier => {
const slice = p.tokSlice(p.tok_i);
var it = std.unicode.Utf8View.initUnchecked(slice).iterator();
var loc = p.pp.tokens.items(.loc)[p.tok_i];

if (mem.indexOfScalar(u8, slice, '$')) |i| {
loc.byte_offset += @intCast(u32, i);
try p.pp.comp.diag.add(.{
.tag = .dollar_in_identifier_extension,
.loc = loc,
});
loc = p.pp.tokens.items(.loc)[p.tok_i];
}

while (it.nextCodepoint()) |c| {
if (try checkIdentifierCodepoint(p.pp.comp, c, loc)) break;
loc.byte_offset += std.unicode.utf8CodepointSequenceLength(c) catch unreachable;
}
},
else => return null,
}
p.tok_i += 1;

// Handle illegal '$' characters in identifiers
if (!p.pp.comp.langopts.dollars_in_identifiers) {
if (p.tok_ids[p.tok_i] == .invalid and p.tokSlice(p.tok_i)[0] == '$') {
try p.err(.dollars_in_identifiers);
p.tok_i += 1;
return error.ParsingFailed;
}
return p.tok_i;
}
return null;

return p.tok_i - 1;
}

fn expectIdentifier(p: *Parser) Error!TokenIndex {
Expand All @@ -161,6 +181,7 @@ fn expectIdentifier(p: *Parser) Error!TokenIndex {
} });
return error.ParsingFailed;
}

return (try p.eatIdentifier()) orelse unreachable;
}

Expand Down Expand Up @@ -1248,25 +1269,10 @@ const InitDeclarator = struct { d: Declarator, initializer: NodeIndex = .none };
fn attribute(p: *Parser) Error!Attribute {
const name_tok = p.tok_i;
switch (p.tok_ids[p.tok_i]) {
.identifier, .keyword_const, .keyword_const1, .keyword_const2 => {},
.extended_identifier => {
const slice = p.tokSlice(p.tok_i);
var it = std.unicode.Utf8View.initUnchecked(slice).iterator();
var loc = p.pp.tokens.items(.loc)[p.tok_i];
while (it.nextCodepoint()) |c| {
if (try checkIdentifierCodepoint(p.pp.comp, c, loc)) break;
loc.byte_offset += std.unicode.utf8CodepointSequenceLength(c) catch unreachable;
}
},
else => {
try p.errExtra(.expected_token, p.tok_i, .{ .tok_id = .{
.expected = .identifier,
.actual = p.tok_ids[p.tok_i],
} });
return error.ParsingFailed;
},
.keyword_const, .keyword_const1, .keyword_const2 => p.tok_i += 1,
else => _ = try p.expectIdentifier(),
}
p.tok_i += 1;

switch (p.tok_ids[p.tok_i]) {
.comma, .r_paren => { // will be consumed in attributeList
return Attribute{ .name = name_tok };
Expand Down
10 changes: 6 additions & 4 deletions src/Tokenizer.zig
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,8 @@ pub const Token = struct {
/// does not check basic character set chars because the tokenizer handles them separately to keep the common
/// case on the fast path
pub fn mayAppearInIdent(comp: *const Compilation, codepoint: u21, where: enum { start, inside }) bool {
if (codepoint == '$') return comp.langopts.dollars_in_identifiers;
if (codepoint < 0x7F) return false;
return switch (where) {
.start => if (comp.langopts.standard.atLeast(.c11))
CharInfo.isC11IdChar(codepoint) and !CharInfo.isC11DisallowedInitialIdChar(codepoint)
Expand Down Expand Up @@ -771,7 +773,7 @@ pub fn next(self: *Tokenizer) Token {
'u' => state = .u,
'U' => state = .U,
'L' => state = .L,
'a'...'t', 'v'...'z', 'A'...'K', 'M'...'T', 'V'...'Z', '_', '$' => state = .identifier,
'a'...'t', 'v'...'z', 'A'...'K', 'M'...'T', 'V'...'Z', '_' => state = .identifier,
'=' => state = .equal,
'!' => state = .bang,
'|' => state = .pipe,
Expand Down Expand Up @@ -845,7 +847,7 @@ pub fn next(self: *Tokenizer) Token {
'1'...'9' => state = .integer_literal,
'\\' => state = .back_slash,
'\t', '\x0B', '\x0C', ' ' => state = .whitespace,
else => if (c > 0x7F and Token.mayAppearInIdent(self.comp, c, .start)) {
else => if (Token.mayAppearInIdent(self.comp, c, .start)) {
state = .extended_identifier;
} else {
id = .invalid;
Expand Down Expand Up @@ -1053,9 +1055,9 @@ pub fn next(self: *Tokenizer) Token {
},
},
.identifier, .extended_identifier => switch (c) {
'a'...'z', 'A'...'Z', '_', '0'...'9', '$' => {},
'a'...'z', 'A'...'Z', '_', '0'...'9' => {},
else => {
if (c <= 0x7F or !Token.mayAppearInIdent(self.comp, c, .inside)) {
if (!Token.mayAppearInIdent(self.comp, c, .inside)) {
id = if (state == .identifier) Token.getTokenId(self.comp, self.buf[start..self.index]) else .extended_identifier;
break;
}
Expand Down
8 changes: 8 additions & 0 deletions src/main.zig
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ const usage =
\\ -fno-color-diagnostics Disable colors in diagnostics
\\ -fshort-enums Use the narrowest possible integer type for enums.
\\ -fno-short-enums Use "int" as the tag type for enums.
\\ -fdollars-in-identifiers
\\ Allow '$' in identifiers
\\ -fno-dollars-in-identifiers
\\ Disallow '$' in identifiers
\\ -I <dir> Add directory to include search path
\\ -isystem Add directory to SYSTEM include search path
\\ -o <file> Write output to <file>
Expand Down Expand Up @@ -143,6 +147,10 @@ fn handleArgs(comp: *Compilation, args: [][]const u8) !void {
comp.langopts.short_enums = true;
} else if (mem.eql(u8, arg, "-fno-short-enums")) {
comp.langopts.short_enums = false;
} else if (mem.eql(u8, arg, "-fdollars-in-identifiers")) {
comp.langopts.dollars_in_identifiers = true;
} else if (mem.eql(u8, arg, "-fno-dollars-in-identifiers")) {
comp.langopts.dollars_in_identifiers = false;
} else if (mem.startsWith(u8, arg, "-I")) {
var path = arg["-I".len..];
if (path.len == 0) {
Expand Down
18 changes: 18 additions & 0 deletions test/cases/dollars in identifiers.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
void foo$() { }

void fib() {
int $test;
}

void ano$ther() {}

#pragma GCC diagnostic warning "-Wdollar-in-identifier-extension"

void identi$fier() {}

void inside() {
int vari$able;
}

#define EXPECTED_ERRORS "dollars in identifiers.c:11:12: warning: '$' in identifier" \
"dollars in identifiers.c:14:11: warning: '$' in identifier"