Skip to content

Commit

Permalink
Parser: add -fdollars-in-identifiers and -fno-dollars-in-identifiers …
Browse files Browse the repository at this point in the history
…option

- add -f(no-)dollars-in-identifiers option
- add warnings related to -W(no-)dollar-in-identifier-extension
- dollar $ symbol is now exclusively part of extended_identifier
- emit warning when identifier is followed by dollar symbol in
-fno-dollars-in-identifiers mode
  • Loading branch information
iddev5 authored Dec 4, 2021
1 parent 407a3ea commit 086d4bb
Show file tree
Hide file tree
Showing 6 changed files with 82 additions and 35 deletions.
12 changes: 12 additions & 0 deletions src/Diagnostics.zig
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ pub const Options = struct {
@"unicode-zero-width": ?Kind = null,
@"unicode-homoglyph": ?Kind = null,
@"return-type": ?Kind = null,
@"dollar-in-identifier-extension": ?Kind = null,
};

const messages = struct {
Expand Down Expand Up @@ -1266,6 +1267,16 @@ const messages = struct {
const extra = .str;
const kind = .@"error";
};
const dollar_in_identifier_extension = struct {
const msg = "'$' in identifier";
const opt = "dollar-in-identifier-extension";
const kind = .off;
const suppress_language_option = "dollars_in_identifiers";
};
const dollars_in_identifiers = struct {
const msg = "illegal character '$' in identifier";
const kind = .@"error";
};
};

list: std.ArrayList(Message),
Expand Down Expand Up @@ -1484,6 +1495,7 @@ fn tagKind(diag: *Diagnostics, tag: Tag) Kind {
}
if (@hasDecl(info, "suppress_version")) if (comp.langopts.standard.atLeast(info.suppress_version)) return .off;
if (@hasDecl(info, "suppress_gnu")) if (comp.langopts.standard.isExplicitGNU()) return .off;
if (@hasDecl(info, "suppress_language_option")) if (!@field(comp.langopts, info.suppress_language_option)) return .off;
if (kind == .@"error" and diag.fatal_errors) kind = .@"fatal error";
return kind;
}
Expand Down
1 change: 1 addition & 0 deletions src/LangOpts.zig
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ const Standard = enum {
standard: Standard = .default,
/// -fshort-enums option, makes enums only take up as much space as they need to hold all the values.
short_enums: bool = false,
dollars_in_identifiers: bool = true,

pub fn setStandard(self: *LangOpts, name: []const u8) error{InvalidStandard}!void {
self.standard = Standard.NameMap.get(name) orelse return error.InvalidStandard;
Expand Down
68 changes: 37 additions & 31 deletions src/Parser.zig
Original file line number Diff line number Diff line change
Expand Up @@ -136,21 +136,41 @@ fn checkIdentifierCodepoint(comp: *Compilation, codepoint: u21, loc: Source.Loca
}

fn eatIdentifier(p: *Parser) !?TokenIndex {
if (p.tok_ids[p.tok_i] == .identifier) {
defer p.tok_i += 1;
return p.tok_i;
} else if (p.tok_ids[p.tok_i] == .extended_identifier) {
defer p.tok_i += 1;
const slice = p.tokSlice(p.tok_i);
var it = std.unicode.Utf8View.initUnchecked(slice).iterator();
var loc = p.pp.tokens.items(.loc)[p.tok_i];
while (it.nextCodepoint()) |c| {
if (try checkIdentifierCodepoint(p.pp.comp, c, loc)) break;
loc.byte_offset += std.unicode.utf8CodepointSequenceLength(c) catch unreachable;
switch (p.tok_ids[p.tok_i]) {
.identifier => {},
.extended_identifier => {
const slice = p.tokSlice(p.tok_i);
var it = std.unicode.Utf8View.initUnchecked(slice).iterator();
var loc = p.pp.tokens.items(.loc)[p.tok_i];

if (mem.indexOfScalar(u8, slice, '$')) |i| {
loc.byte_offset += @intCast(u32, i);
try p.pp.comp.diag.add(.{
.tag = .dollar_in_identifier_extension,
.loc = loc,
});
loc = p.pp.tokens.items(.loc)[p.tok_i];
}

while (it.nextCodepoint()) |c| {
if (try checkIdentifierCodepoint(p.pp.comp, c, loc)) break;
loc.byte_offset += std.unicode.utf8CodepointSequenceLength(c) catch unreachable;
}
},
else => return null,
}
p.tok_i += 1;

// Handle illegal '$' characters in identifiers
if (!p.pp.comp.langopts.dollars_in_identifiers) {
if (p.tok_ids[p.tok_i] == .invalid and p.tokSlice(p.tok_i)[0] == '$') {
try p.err(.dollars_in_identifiers);
p.tok_i += 1;
return error.ParsingFailed;
}
return p.tok_i;
}
return null;

return p.tok_i - 1;
}

fn expectIdentifier(p: *Parser) Error!TokenIndex {
Expand All @@ -161,6 +181,7 @@ fn expectIdentifier(p: *Parser) Error!TokenIndex {
} });
return error.ParsingFailed;
}

return (try p.eatIdentifier()) orelse unreachable;
}

Expand Down Expand Up @@ -1244,25 +1265,10 @@ const InitDeclarator = struct { d: Declarator, initializer: NodeIndex = .none };
fn attribute(p: *Parser) Error!Attribute {
const name_tok = p.tok_i;
switch (p.tok_ids[p.tok_i]) {
.identifier, .keyword_const, .keyword_const1, .keyword_const2 => {},
.extended_identifier => {
const slice = p.tokSlice(p.tok_i);
var it = std.unicode.Utf8View.initUnchecked(slice).iterator();
var loc = p.pp.tokens.items(.loc)[p.tok_i];
while (it.nextCodepoint()) |c| {
if (try checkIdentifierCodepoint(p.pp.comp, c, loc)) break;
loc.byte_offset += std.unicode.utf8CodepointSequenceLength(c) catch unreachable;
}
},
else => {
try p.errExtra(.expected_token, p.tok_i, .{ .tok_id = .{
.expected = .identifier,
.actual = p.tok_ids[p.tok_i],
} });
return error.ParsingFailed;
},
.keyword_const, .keyword_const1, .keyword_const2 => p.tok_i += 1,
else => _ = try p.expectIdentifier(),
}
p.tok_i += 1;

switch (p.tok_ids[p.tok_i]) {
.comma, .r_paren => { // will be consumed in attributeList
return Attribute{ .name = name_tok };
Expand Down
10 changes: 6 additions & 4 deletions src/Tokenizer.zig
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,8 @@ pub const Token = struct {
/// does not check basic character set chars because the tokenizer handles them separately to keep the common
/// case on the fast path
pub fn mayAppearInIdent(comp: *const Compilation, codepoint: u21, where: enum { start, inside }) bool {
if (codepoint == '$') return comp.langopts.dollars_in_identifiers;
if (codepoint < 0x7F) return false;
return switch (where) {
.start => if (comp.langopts.standard.atLeast(.c11))
CharInfo.isC11IdChar(codepoint) and !CharInfo.isC11DisallowedInitialIdChar(codepoint)
Expand Down Expand Up @@ -780,7 +782,7 @@ pub fn next(self: *Tokenizer) Token {
'u' => state = .u,
'U' => state = .U,
'L' => state = .L,
'a'...'t', 'v'...'z', 'A'...'K', 'M'...'T', 'V'...'Z', '_', '$' => state = .identifier,
'a'...'t', 'v'...'z', 'A'...'K', 'M'...'T', 'V'...'Z', '_' => state = .identifier,
'=' => state = .equal,
'!' => state = .bang,
'|' => state = .pipe,
Expand Down Expand Up @@ -854,7 +856,7 @@ pub fn next(self: *Tokenizer) Token {
'1'...'9' => state = .integer_literal,
'\\' => state = .back_slash,
'\t', '\x0B', '\x0C', ' ' => state = .whitespace,
else => if (c > 0x7F and Token.mayAppearInIdent(self.comp, c, .start)) {
else => if (Token.mayAppearInIdent(self.comp, c, .start)) {
state = .extended_identifier;
} else {
id = .invalid;
Expand Down Expand Up @@ -1074,9 +1076,9 @@ pub fn next(self: *Tokenizer) Token {
},
},
.identifier, .extended_identifier => switch (c) {
'a'...'z', 'A'...'Z', '_', '0'...'9', '$' => {},
'a'...'z', 'A'...'Z', '_', '0'...'9' => {},
else => {
if (c <= 0x7F or !Token.mayAppearInIdent(self.comp, c, .inside)) {
if (!Token.mayAppearInIdent(self.comp, c, .inside)) {
id = if (state == .identifier) Token.getTokenId(self.comp, self.buf[start..self.index]) else .extended_identifier;
break;
}
Expand Down
8 changes: 8 additions & 0 deletions src/main.zig
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ const usage =
\\ -fno-color-diagnostics Disable colors in diagnostics
\\ -fshort-enums Use the narrowest possible integer type for enums.
\\ -fno-short-enums Use "int" as the tag type for enums.
\\ -fdollars-in-identifiers
\\ Allow '$' in identifiers
\\ -fno-dollars-in-identifiers
\\ Disallow '$' in identifiers
\\ -I <dir> Add directory to include search path
\\ -isystem Add directory to SYSTEM include search path
\\ -o <file> Write output to <file>
Expand Down Expand Up @@ -143,6 +147,10 @@ fn handleArgs(comp: *Compilation, args: [][]const u8) !void {
comp.langopts.short_enums = true;
} else if (mem.eql(u8, arg, "-fno-short-enums")) {
comp.langopts.short_enums = false;
} else if (mem.eql(u8, arg, "-fdollars-in-identifiers")) {
comp.langopts.dollars_in_identifiers = true;
} else if (mem.eql(u8, arg, "-fno-dollars-in-identifiers")) {
comp.langopts.dollars_in_identifiers = false;
} else if (mem.startsWith(u8, arg, "-I")) {
var path = arg["-I".len..];
if (path.len == 0) {
Expand Down
18 changes: 18 additions & 0 deletions test/cases/dollars in identifiers.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
void foo$() { }

void fib() {
int $test;
}

void ano$ther() {}

#pragma GCC diagnostic warning "-Wdollar-in-identifier-extension"

void identi$fier() {}

void inside() {
int vari$able;
}

#define EXPECTED_ERRORS "dollars in identifiers.c:11:12: warning: '$' in identifier" \
"dollars in identifiers.c:14:11: warning: '$' in identifier"

0 comments on commit 086d4bb

Please sign in to comment.