Skip to content

Commit

Permalink
[clang] Inject tokens containing #embed back into token stream (llvm#…
Browse files Browse the repository at this point in the history
…97274)

Summary:
Instead of playing "whack a mole" with places where #embed should be
expanded as comma-separated list, just inject each byte as a token back
into the stream, separated by commas.

Test Plan: 

Reviewers: 

Subscribers: 

Tasks: 

Tags: 


Differential Revision: https://phabricator.intern.facebook.com/D59822402
  • Loading branch information
Fznamznon authored and sayhaan committed Jul 16, 2024
1 parent b75cc18 commit 9d14df4
Show file tree
Hide file tree
Showing 10 changed files with 63 additions and 79 deletions.
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/TokenKinds.def
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,9 @@ TOK(raw_identifier) // Used only in raw lexing mode.
// C99 6.4.4.2: Floating Constants
TOK(numeric_constant) // 0x123

// Directly holds numerical value. Used to process C23 #embed.
TOK(binary_data)

// C99 6.4.4: Character Constants
TOK(char_constant) // 'a'
TOK(wide_char_constant) // L'b'
Expand Down
2 changes: 1 addition & 1 deletion clang/include/clang/Basic/TokenKinds.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ inline bool isLiteral(TokenKind K) {
return K == tok::numeric_constant || K == tok::char_constant ||
K == tok::wide_char_constant || K == tok::utf8_char_constant ||
K == tok::utf16_char_constant || K == tok::utf32_char_constant ||
isStringLiteral(K) || K == tok::header_name;
isStringLiteral(K) || K == tok::header_name || K == tok::binary_data;
}

/// Return true if this is any of tok::annot_* kinds.
Expand Down
7 changes: 4 additions & 3 deletions clang/include/clang/Lex/Preprocessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -2123,17 +2123,18 @@ class Preprocessor {
char
getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
bool *Invalid = nullptr) const {
assert(Tok.is(tok::numeric_constant) &&
assert((Tok.is(tok::numeric_constant) || Tok.is(tok::binary_data)) &&
Tok.getLength() == 1 && "Called on unsupported token");
assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");

// If the token is carrying a literal data pointer, just use it.
if (const char *D = Tok.getLiteralData())
return *D;
return (Tok.getKind() == tok::binary_data) ? *D : *D - '0';

assert(Tok.is(tok::numeric_constant) && "binary data with no data");
// Otherwise, fall back on getCharacterData, which is slower, but always
// works.
return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid);
return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid) - '0';
}

/// Retrieve the name of the immediate macro expansion.
Expand Down
3 changes: 1 addition & 2 deletions clang/include/clang/Parse/Parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -2127,7 +2127,7 @@ class Parser : public CodeCompletionHandler {
};
ExprResult ParseInitializerWithPotentialDesignator(DesignatorCompletionInfo);
ExprResult createEmbedExpr();
void ExpandEmbedDirective(SmallVectorImpl<Expr *> &Exprs);
void injectEmbedTokens();

//===--------------------------------------------------------------------===//
// clang Expressions
Expand Down Expand Up @@ -3834,7 +3834,6 @@ class Parser : public CodeCompletionHandler {
AnnotateTemplateIdTokenAsType(CXXScopeSpec &SS,
ImplicitTypenameContext AllowImplicitTypename,
bool IsClassName = false);
void ExpandEmbedIntoTemplateArgList(TemplateArgList &TemplateArgs);
bool ParseTemplateArgumentList(TemplateArgList &TemplateArgs,
TemplateTy Template, SourceLocation OpenLoc);
ParsedTemplateArgument ParseTemplateTemplateArgument();
Expand Down
55 changes: 26 additions & 29 deletions clang/lib/Parse/ParseExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1099,6 +1099,7 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,

// primary-expression
case tok::numeric_constant:
case tok::binary_data:
// constant: integer-constant
// constant: floating-constant

Expand Down Expand Up @@ -1148,18 +1149,9 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
}

case tok::annot_embed: {
// We've met #embed in a context where a single value is expected. Take last
// element from #embed data as if it were a comma expression.
EmbedAnnotationData *Data =
reinterpret_cast<EmbedAnnotationData *>(Tok.getAnnotationValue());
SourceLocation StartLoc = ConsumeAnnotationToken();
ASTContext &Context = Actions.getASTContext();
Res = IntegerLiteral::Create(Context,
llvm::APInt(CHAR_BIT, Data->BinaryData.back()),
Context.UnsignedCharTy, StartLoc);
if (Data->BinaryData.size() > 1)
Diag(StartLoc, diag::warn_unused_comma_left_operand);
break;
injectEmbedTokens();
return ParseCastExpression(ParseKind, isAddressOfOperand, isTypeCast,
isVectorLiteral, NotPrimaryExpression);
}

case tok::kw___super:
Expand Down Expand Up @@ -3584,15 +3576,29 @@ ExprResult Parser::ParseFoldExpression(ExprResult LHS,
T.getCloseLocation());
}

void Parser::ExpandEmbedDirective(SmallVectorImpl<Expr *> &Exprs) {
void Parser::injectEmbedTokens() {
EmbedAnnotationData *Data =
reinterpret_cast<EmbedAnnotationData *>(Tok.getAnnotationValue());
SourceLocation StartLoc = ConsumeAnnotationToken();
ASTContext &Context = Actions.getASTContext();
for (auto Byte : Data->BinaryData) {
Exprs.push_back(IntegerLiteral::Create(Context, llvm::APInt(CHAR_BIT, Byte),
Context.UnsignedCharTy, StartLoc));
}
MutableArrayRef<Token> Toks(PP.getPreprocessorAllocator().Allocate<Token>(
Data->BinaryData.size() * 2 - 1),
Data->BinaryData.size() * 2 - 1);
unsigned I = 0;
for (auto &Byte : Data->BinaryData) {
Toks[I].startToken();
Toks[I].setKind(tok::binary_data);
Toks[I].setLocation(Tok.getLocation());
Toks[I].setLength(1);
Toks[I].setLiteralData(&Byte);
if (I != ((Data->BinaryData.size() - 1) * 2)) {
Toks[I + 1].startToken();
Toks[I + 1].setKind(tok::comma);
Toks[I + 1].setLocation(Tok.getLocation());
}
I += 2;
}
PP.EnterTokenStream(std::move(Toks), /*DisableMacroExpansion=*/true,
/*IsReinject=*/false);
ConsumeAnyToken(/*ConsumeCodeCompletionTok=*/true);
}

/// ParseExpressionList - Used for C/C++ (argument-)expression-list.
Expand Down Expand Up @@ -3630,17 +3636,8 @@ bool Parser::ParseExpressionList(SmallVectorImpl<Expr *> &Exprs,
if (getLangOpts().CPlusPlus11 && Tok.is(tok::l_brace)) {
Diag(Tok, diag::warn_cxx98_compat_generalized_initializer_lists);
Expr = ParseBraceInitializer();
} else if (Tok.is(tok::annot_embed)) {
ExpandEmbedDirective(Exprs);
if (Tok.isNot(tok::comma))
break;
Token Comma = Tok;
ConsumeToken();
checkPotentialAngleBracketDelimiter(Comma);
continue;
} else {
} else
Expr = ParseAssignmentExpression();
}

if (EarlyTypoCorrection)
Expr = Actions.CorrectDelayedTyposInExpr(Expr);
Expand Down
41 changes: 12 additions & 29 deletions clang/lib/Parse/ParseTemplate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1523,19 +1523,6 @@ ParsedTemplateArgument Parser::ParseTemplateArgument() {
ExprArg.get(), Loc);
}

void Parser::ExpandEmbedIntoTemplateArgList(TemplateArgList &TemplateArgs) {
EmbedAnnotationData *Data =
reinterpret_cast<EmbedAnnotationData *>(Tok.getAnnotationValue());
SourceLocation StartLoc = ConsumeAnnotationToken();
ASTContext &Context = Actions.getASTContext();
for (auto Byte : Data->BinaryData) {
Expr *E = IntegerLiteral::Create(Context, llvm::APInt(CHAR_BIT, Byte),
Context.UnsignedCharTy, StartLoc);
TemplateArgs.push_back(
ParsedTemplateArgument(ParsedTemplateArgument::NonType, E, StartLoc));
}
}

/// ParseTemplateArgumentList - Parse a C++ template-argument-list
/// (C++ [temp.names]). Returns true if there was an error.
///
Expand All @@ -1560,24 +1547,20 @@ bool Parser::ParseTemplateArgumentList(TemplateArgList &TemplateArgs,

do {
PreferredType.enterFunctionArgument(Tok.getLocation(), RunSignatureHelp);
if (Tok.is(tok::annot_embed)) {
ExpandEmbedIntoTemplateArgList(TemplateArgs);
} else {
ParsedTemplateArgument Arg = ParseTemplateArgument();
SourceLocation EllipsisLoc;
if (TryConsumeToken(tok::ellipsis, EllipsisLoc))
Arg = Actions.ActOnPackExpansion(Arg, EllipsisLoc);

if (Arg.isInvalid()) {
if (PP.isCodeCompletionReached() && !CalledSignatureHelp)
RunSignatureHelp();
return true;
}

// Save this template argument.
TemplateArgs.push_back(Arg);
ParsedTemplateArgument Arg = ParseTemplateArgument();
SourceLocation EllipsisLoc;
if (TryConsumeToken(tok::ellipsis, EllipsisLoc))
Arg = Actions.ActOnPackExpansion(Arg, EllipsisLoc);

if (Arg.isInvalid()) {
if (PP.isCodeCompletionReached() && !CalledSignatureHelp)
RunSignatureHelp();
return true;
}

// Save this template argument.
TemplateArgs.push_back(Arg);

// If the next token is a comma, consume it and keep reading
// arguments.
} while (TryConsumeToken(tok::comma));
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/Sema/SemaExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3643,9 +3643,9 @@ bool Sema::CheckLoopHintExpr(Expr *E, SourceLocation Loc, bool AllowZero) {
ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
// Fast path for a single digit (which is quite common). A single digit
// cannot have a trigraph, escaped newline, radix prefix, or suffix.
if (Tok.getLength() == 1) {
if (Tok.getLength() == 1 || Tok.getKind() == tok::binary_data) {
const char Val = PP.getSpellingOfSingleCharacterNumericConstant(Tok);
return ActOnIntegerConstant(Tok.getLocation(), Val-'0');
return ActOnIntegerConstant(Tok.getLocation(), Val);
}

SmallString<128> SpellingBuffer;
Expand Down
3 changes: 2 additions & 1 deletion clang/test/Preprocessor/embed_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,9 @@ a
};

// CHECK: store i32 107, ptr %b, align 4
int b =
int b = (
#embed<jk.txt>
)
;


Expand Down
3 changes: 2 additions & 1 deletion clang/test/Preprocessor/embed_constexpr.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
// RUN: %clang_cc1 %s -fsyntax-only --embed-dir=%S/Inputs -verify -Wno-c23-extensions
// RUN: %clang_cc1 %s -fsyntax-only --embed-dir=%S/Inputs -verify -fexperimental-new-constant-interpreter -Wno-c23-extensions
// expected-no-diagnostics

constexpr int value(int a, int b) {
return a + b;
Expand Down Expand Up @@ -46,7 +47,7 @@ int array[
static_assert(sizeof(array) / sizeof(int) == 'j');

constexpr int comma_expr = (
#embed <jk.txt> // expected-warning {{left operand of comma operator has no effect}}
#embed <jk.txt>
);
static_assert(comma_expr == 'k');

Expand Down
21 changes: 10 additions & 11 deletions clang/test/Preprocessor/embed_weird.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,17 @@ _Static_assert(
_Static_assert(sizeof(
#embed <single_byte.txt>
) ==
sizeof(unsigned char)
sizeof(int)
, ""
);
_Static_assert(sizeof
#embed <single_byte.txt>
, ""
);
_Static_assert(sizeof(
#embed <jk.txt> // expected-warning {{left operand of comma operator has no effect}}
#embed <jk.txt>
) ==
sizeof(unsigned char)
sizeof(int)
, ""
);

Expand Down Expand Up @@ -73,10 +73,10 @@ void do_stuff() {
// Ensure that we don't accidentally allow you to initialize an unsigned char *
// from embedded data; the data is modeled as a string literal internally, but
// is not actually a string literal.
const unsigned char *ptr =
const unsigned char *ptr = (
#embed <jk.txt> // expected-warning {{left operand of comma operator has no effect}}
; // c-error@-2 {{incompatible integer to pointer conversion initializing 'const unsigned char *' with an expression of type 'unsigned char'}} \
cxx-error@-2 {{cannot initialize a variable of type 'const unsigned char *' with an rvalue of type 'unsigned char'}}
); // c-error@-2 {{incompatible integer to pointer conversion initializing 'const unsigned char *' with an expression of type 'int'}} \
cxx-error@-2 {{cannot initialize a variable of type 'const unsigned char *' with an rvalue of type 'int'}}

// However, there are some cases where this is fine and should work.
const unsigned char *null_ptr_1 =
Expand All @@ -101,11 +101,10 @@ constexpr unsigned char ch =
;
static_assert(ch == 0);

void foobar(float x, char y, char z); // cxx-note {{candidate function not viable: requires 3 arguments, but 1 was provided}}
// c-note@-1 {{declared here}}
void g1() { foobar((float) // cxx-error {{no matching function for call to 'foobar'}}
#embed "numbers.txt" limit(3) // expected-warning {{left operand of comma operator has no effect}}
); // c-error {{too few arguments to function call, expected 3, have 1}}
void foobar(float x, char y, char z);
void g1() { foobar((float)
#embed "numbers.txt" limit(3)
);
}

#if __cplusplus
Expand Down

0 comments on commit 9d14df4

Please sign in to comment.