From 7050c932f63f9cb9e94636b287887f8241083117 Mon Sep 17 00:00:00 2001 From: ThePhD Date: Thu, 28 Sep 2023 18:31:34 -0400 Subject: [PATCH 001/113] =?UTF-8?q?=E2=9C=A8=20[Sema,=20Driver,=20Lex,=20F?= =?UTF-8?q?rontend]=20Implement=20naive=20#embed=20for=20C23=20and=20C++26?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🛠 [Frontend] Ensure commas inserted by #embed are properly serialized to output --- clang/CMakeLists.txt | 3 +- clang/include/clang/Basic/Builtins.def | 3 + clang/include/clang/Basic/DiagnosticGroups.td | 6 + .../include/clang/Basic/DiagnosticLexKinds.td | 24 +- clang/include/clang/Basic/FileManager.h | 8 +- clang/include/clang/Basic/TokenKinds.def | 7 + clang/include/clang/Driver/Options.td | 16 + .../Frontend/PreprocessorOutputOptions.h | 2 + clang/include/clang/Lex/PPCallbacks.h | 77 ++- clang/include/clang/Lex/Preprocessor.h | 66 ++- clang/include/clang/Lex/PreprocessorOptions.h | 7 + clang/lib/Basic/FileManager.cpp | 8 +- clang/lib/Basic/IdentifierTable.cpp | 3 +- clang/lib/Driver/ToolChains/Clang.cpp | 5 +- clang/lib/Format/FormatToken.h | 2 + clang/lib/Format/TokenAnnotator.cpp | 28 + clang/lib/Frontend/CompilerInvocation.cpp | 19 + clang/lib/Frontend/DependencyFile.cpp | 29 + clang/lib/Frontend/DependencyGraph.cpp | 43 +- clang/lib/Frontend/InitPreprocessor.cpp | 7 + .../lib/Frontend/PrintPreprocessedOutput.cpp | 25 +- .../Frontend/Rewrite/InclusionRewriter.cpp | 13 + clang/lib/Lex/PPCallbacks.cpp | 11 - clang/lib/Lex/PPDirectives.cpp | 500 ++++++++++++++++++ clang/lib/Lex/PPExpressions.cpp | 44 +- clang/lib/Lex/PPMacroExpansion.cpp | 120 +++++ clang/test/Preprocessor/Inputs/jk.txt | 1 + clang/test/Preprocessor/Inputs/media/art.txt | 9 + clang/test/Preprocessor/Inputs/media/empty | 0 .../test/Preprocessor/Inputs/single_byte.txt | 1 + clang/test/Preprocessor/embed___has_embed.c | 34 ++ .../embed___has_embed_supported.c | 24 + .../test/Preprocessor/embed_feature_test.cpp | 13 + .../test/Preprocessor/embed_file_not_found.c | 4 + clang/test/Preprocessor/embed_init.c | 28 + .../Preprocessor/embed_parameter_if_empty.c | 16 + .../test/Preprocessor/embed_parameter_limit.c | 15 + .../Preprocessor/embed_parameter_offset.c | 15 + .../Preprocessor/embed_parameter_prefix.c | 15 + .../Preprocessor/embed_parameter_suffix.c | 15 + .../embed_parameter_unrecognized.c | 8 + clang/test/Preprocessor/embed_path_chevron.c | 8 + clang/test/Preprocessor/embed_path_quote.c | 8 + clang/test/Preprocessor/single_byte.txt | 1 + llvm/CMakeLists.txt | 7 + llvm/cmake/modules/GetHostTriple.cmake | 6 +- 46 files changed, 1264 insertions(+), 40 deletions(-) create mode 100644 clang/test/Preprocessor/Inputs/jk.txt create mode 100644 clang/test/Preprocessor/Inputs/media/art.txt create mode 100644 clang/test/Preprocessor/Inputs/media/empty create mode 100644 clang/test/Preprocessor/Inputs/single_byte.txt create mode 100644 clang/test/Preprocessor/embed___has_embed.c create mode 100644 clang/test/Preprocessor/embed___has_embed_supported.c create mode 100644 clang/test/Preprocessor/embed_feature_test.cpp create mode 100644 clang/test/Preprocessor/embed_file_not_found.c create mode 100644 clang/test/Preprocessor/embed_init.c create mode 100644 clang/test/Preprocessor/embed_parameter_if_empty.c create mode 100644 clang/test/Preprocessor/embed_parameter_limit.c create mode 100644 clang/test/Preprocessor/embed_parameter_offset.c create mode 100644 clang/test/Preprocessor/embed_parameter_prefix.c create mode 100644 clang/test/Preprocessor/embed_parameter_suffix.c create mode 100644 clang/test/Preprocessor/embed_parameter_unrecognized.c create mode 100644 clang/test/Preprocessor/embed_path_chevron.c create mode 100644 clang/test/Preprocessor/embed_path_quote.c create mode 100644 clang/test/Preprocessor/single_byte.txt diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index 9b52c58be41e7f..1b88905da3b859 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -300,6 +300,7 @@ configure_file( ${CMAKE_CURRENT_BINARY_DIR}/include/clang/Basic/Version.inc) # Add appropriate flags for GCC +option(CLANG_ENABLE_PEDANTIC "Compile with pedantic enabled." ON) if (LLVM_COMPILER_IS_GCC_COMPATIBLE) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-common -Woverloaded-virtual") if (NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") @@ -307,7 +308,7 @@ if (LLVM_COMPILER_IS_GCC_COMPATIBLE) endif () # Enable -pedantic for Clang even if it's not enabled for LLVM. - if (NOT LLVM_ENABLE_PEDANTIC) + if (NOT LLVM_ENABLE_PEDANTIC AND CLANG_ENABLE_PEDANTIC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pedantic -Wno-long-long") endif () diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def index 6ea8484606cfd5..0dfc6456daf059 100644 --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -1766,6 +1766,9 @@ BUILTIN(__builtin_ms_va_copy, "vc*&c*&", "n") // Arithmetic Fence: to prevent FP reordering and reassociation optimizations LANGBUILTIN(__arithmetic_fence, "v.", "tE", ALL_LANGUAGES) +// preprocessor embed builtin +LANGBUILTIN(__builtin_pp_embed, "v.", "tE", ALL_LANGUAGES) + #undef BUILTIN #undef LIBBUILTIN #undef LANGBUILTIN diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 0b09c002191848..89f6715cebfdc0 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -708,6 +708,12 @@ def ReservedIdAsMacro : DiagGroup<"reserved-macro-identifier">; def ReservedIdAsMacroAlias : DiagGroup<"reserved-id-macro", [ReservedIdAsMacro]>; def RestrictExpansionMacro : DiagGroup<"restrict-expansion">; def FinalMacro : DiagGroup<"final-macro">; +// Warnings about unknown preprocessor parameters (e.g. `#embed` and extensions) +def UnsupportedDirective : DiagGroup<"unsupported-directive">; +def UnknownDirectiveParameters : DiagGroup<"unknown-directive-parameters">; +def IgnoredDirectiveParameters : DiagGroup<"ignored-directive-parameters">; +def DirectiveParameters : DiagGroup<"directive-parameters", + [UnknownDirectiveParameters, IgnoredDirectiveParameters]>; // Just silence warnings about -Wstrict-aliasing for now. def : DiagGroup<"strict-aliasing=0">; diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 940cca67368492..4490f40806b034 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -422,6 +422,22 @@ def warn_cxx23_compat_warning_directive : Warning< def warn_c23_compat_warning_directive : Warning< "#warning is incompatible with C standards before C23">, InGroup, DefaultIgnore; +def warn_c23_pp_embed : Warning< + "'__has_embed' is a C23 extension">, + InGroup, + DefaultIgnore; +def warn_c23_pp_has_embed : Warning< + "'__has_embed' is a C23 extension">, + InGroup, + DefaultIgnore; +def warn_cxx26_pp_embed : Warning< + "'__has_embed' is a C++26 extension">, + InGroup, + DefaultIgnore; +def warn_cxx26_pp_has_embed : Warning< + "'__has_embed' is a C++26 extension">, + InGroup, + DefaultIgnore; def ext_pp_extra_tokens_at_eol : ExtWarn< "extra tokens at end of #%0 directive">, InGroup; @@ -483,7 +499,13 @@ def ext_pp_gnu_line_directive : Extension< def err_pp_invalid_directive : Error< "invalid preprocessing directive%select{|, did you mean '#%1'?}0">; def warn_pp_invalid_directive : Warning< - err_pp_invalid_directive.Summary>, InGroup>; + err_pp_invalid_directive.Summary>, + InGroup; +def warn_pp_unknown_parameter_ignored : Warning< + "unknown%select{ | embed}0 preprocessor parameter '%1' ignored">, + InGroup; +def err_pp_unsupported_directive : Error< + "unsupported%select{ | embed}0 directive: %1">; def err_pp_directive_required : Error< "%0 must be used within a preprocessing directive">; def err_pp_file_not_found : Error<"'%0' file not found">, DefaultFatal; diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h index 56cb093dd8c376..c757f8775b425e 100644 --- a/clang/include/clang/Basic/FileManager.h +++ b/clang/include/clang/Basic/FileManager.h @@ -276,11 +276,13 @@ class FileManager : public RefCountedBase { /// MemoryBuffer if successful, otherwise returning null. llvm::ErrorOr> getBufferForFile(FileEntryRef Entry, bool isVolatile = false, - bool RequiresNullTerminator = true); + bool RequiresNullTerminator = true, + std::optional MaybeLimit = std::nullopt); llvm::ErrorOr> getBufferForFile(StringRef Filename, bool isVolatile = false, - bool RequiresNullTerminator = true) { - return getBufferForFileImpl(Filename, /*FileSize=*/-1, isVolatile, + bool RequiresNullTerminator = true, + std::optional MaybeLimit = std::nullopt) { + return getBufferForFileImpl(Filename, /*FileSize=*/(MaybeLimit ? *MaybeLimit : -1), isVolatile, RequiresNullTerminator); } diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index 94db56a9fd5d78..19a66fbb073119 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -126,6 +126,9 @@ PPKEYWORD(error) // C99 6.10.6 - Pragma Directive. PPKEYWORD(pragma) +// C23 & C++26 #embed +PPKEYWORD(embed) + // GNU Extensions. PPKEYWORD(import) PPKEYWORD(include_next) @@ -151,6 +154,10 @@ TOK(eod) // End of preprocessing directive (end of line inside a // directive). TOK(code_completion) // Code completion marker +// #embed speed support +TOK(builtin_embed) + + // C99 6.4.9: Comments. TOK(comment) // Comment (only in -E -C[C] mode) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 3f2058a5d4650c..a77a1a5e9aad98 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -114,6 +114,11 @@ def IncludePath_Group : OptionGroup<"">, Group, DocBrief<[{ Flags controlling how ``#include``\s are resolved to files.}]>; +def EmbedPath_Group : OptionGroup<"">, Group, + DocName<"Embed path management">, + DocBrief<[{ +Flags controlling how ``#embed``\s and similar are resolved to files.}]>; + def I_Group : OptionGroup<"">, Group, DocFlatten; def i_Group : OptionGroup<"">, Group, DocFlatten; def clang_i_Group : OptionGroup<"">, Group, DocFlatten; @@ -816,6 +821,14 @@ will be ignored}]>; def L : JoinedOrSeparate<["-"], "L">, Flags<[RenderJoined]>, Group, Visibility<[ClangOption, FlangOption]>, MetaVarName<"">, HelpText<"Add directory to library search path">; +def embed_dir : JoinedOrSeparate<["-"], "embed-dir">, + Flags<[RenderJoined]>, Group, + Visibility<[ClangOption, CC1Option, CC1AsOption, FlangOption, FC1Option]>, + MetaVarName<"">, HelpText<"Add directory to embed search path">; +def embed_dir_EQ : JoinedOrSeparate<["-"], "embed-dir=">, + Flags<[RenderJoined]>, Group, + Visibility<[ClangOption, CC1Option, CC1AsOption, FlangOption, FC1Option]>, + MetaVarName<"">, HelpText<"Add directory to embed search path">; def MD : Flag<["-"], "MD">, Group, HelpText<"Write a depfile containing user and system headers">; def MMD : Flag<["-"], "MMD">, Group, @@ -1353,6 +1366,9 @@ def dD : Flag<["-"], "dD">, Group, Visibility<[ClangOption, CC1Option]> def dI : Flag<["-"], "dI">, Group, Visibility<[ClangOption, CC1Option]>, HelpText<"Print include directives in -E mode in addition to normal output">, MarshallingInfoFlag>; +def dE : Flag<["-"], "dE">, Group, Visibility<[ClangOption, CC1Option]>, + HelpText<"Print embed directives in -E mode in addition to normal output">, + MarshallingInfoFlag>; def dM : Flag<["-"], "dM">, Group, Visibility<[ClangOption, CC1Option]>, HelpText<"Print macro definitions in -E mode instead of normal output">; def dead__strip : Flag<["-"], "dead_strip">; diff --git a/clang/include/clang/Frontend/PreprocessorOutputOptions.h b/clang/include/clang/Frontend/PreprocessorOutputOptions.h index db2ec9f2ae2069..3e36db3f8ce46e 100644 --- a/clang/include/clang/Frontend/PreprocessorOutputOptions.h +++ b/clang/include/clang/Frontend/PreprocessorOutputOptions.h @@ -22,6 +22,7 @@ class PreprocessorOutputOptions { unsigned ShowMacroComments : 1; ///< Show comments, even in macros. unsigned ShowMacros : 1; ///< Print macro definitions. unsigned ShowIncludeDirectives : 1; ///< Print includes, imports etc. within preprocessed output. + unsigned ShowEmbedDirectives : 1; ///< Print embeds, etc. within preprocessed output. unsigned RewriteIncludes : 1; ///< Preprocess include directives only. unsigned RewriteImports : 1; ///< Include contents of transitively-imported modules. unsigned MinimizeWhitespace : 1; ///< Ignore whitespace from input. @@ -37,6 +38,7 @@ class PreprocessorOutputOptions { ShowMacroComments = 0; ShowMacros = 0; ShowIncludeDirectives = 0; + ShowEmbedDirectives = 0; RewriteIncludes = 0; RewriteImports = 0; MinimizeWhitespace = 0; diff --git a/clang/include/clang/Lex/PPCallbacks.h b/clang/include/clang/Lex/PPCallbacks.h index 94f96cf9c51254..921bf159ead570 100644 --- a/clang/include/clang/Lex/PPCallbacks.h +++ b/clang/include/clang/Lex/PPCallbacks.h @@ -83,6 +83,47 @@ class PPCallbacks { const Token &FilenameTok, SrcMgr::CharacteristicKind FileType) {} + /// Callback invoked whenever the preprocessor cannot find a file for an + /// embed directive. + /// + /// \param FileName The name of the file being included, as written in the + /// source code. + /// + /// \returns true to indicate that the preprocessor should skip this file + /// and not issue any diagnostic. + virtual bool EmbedFileNotFound(StringRef FileName) { return false; } + + /// Callback invoked whenever an embed directive has been processed, + /// regardless of whether the embed will actually find a file. + /// + /// \param HashLoc The location of the '#' that starts the embed directive. + /// + /// \param FileName The name of the file being included, as written in the + /// source code. + /// + /// \param IsAngled Whether the file name was enclosed in angle brackets; + /// otherwise, it was enclosed in quotes. + /// + /// \param FilenameRange The character range of the quotes or angle brackets + /// for the written file name. + /// + /// \param ParametersRange The character range of the embed parameters. An + /// empty range if there were no parameters. + /// + /// \param File The actual file that may be included by this embed directive. + /// + /// \param SearchPath Contains the search path which was used to find the file + /// in the file system. If the file was found via an absolute path, + /// SearchPath will be empty. + /// + /// \param RelativePath The path relative to SearchPath, at which the resource + /// file was found. This is equal to FileName. + virtual void EmbedDirective(SourceLocation HashLoc, StringRef FileName, + bool IsAngled, CharSourceRange FilenameRange, + CharSourceRange ParametersRange, + OptionalFileEntryRef File, StringRef SearchPath, + StringRef RelativePath) {} + /// Callback invoked whenever the preprocessor cannot find a file for an /// inclusion directive. /// @@ -330,11 +371,15 @@ class PPCallbacks { SourceRange Range) { } + /// Hook called when a '__has_embed' directive is read. + virtual void HasEmbed(SourceLocation Loc, StringRef FileName, bool IsAngled, + OptionalFileEntryRef File) {} + /// Hook called when a '__has_include' or '__has_include_next' directive is /// read. virtual void HasInclude(SourceLocation Loc, StringRef FileName, bool IsAngled, OptionalFileEntryRef File, - SrcMgr::CharacteristicKind FileType); + SrcMgr::CharacteristicKind FileType) {} /// Hook called when a source range is skipped. /// \param Range The SourceRange that was skipped. The range begins at the @@ -461,6 +506,25 @@ class PPChainedCallbacks : public PPCallbacks { Second->FileSkipped(SkippedFile, FilenameTok, FileType); } + bool EmbedFileNotFound(StringRef FileName) override { + bool Skip = First->FileNotFound(FileName); + // Make sure to invoke the second callback, no matter if the first already + // returned true to skip the file. + Skip |= Second->FileNotFound(FileName); + return Skip; + } + + void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled, + CharSourceRange FilenameRange, + CharSourceRange ParametersRange, + OptionalFileEntryRef File, StringRef SearchPath, + StringRef RelativePath) override { + First->EmbedDirective(HashLoc, FileName, IsAngled, FilenameRange, + ParametersRange, File, SearchPath, RelativePath); + Second->EmbedDirective(HashLoc, FileName, IsAngled, FilenameRange, + ParametersRange, File, SearchPath, RelativePath); + } + bool FileNotFound(StringRef FileName) override { bool Skip = First->FileNotFound(FileName); // Make sure to invoke the second callback, no matter if the first already @@ -561,9 +625,18 @@ class PPChainedCallbacks : public PPCallbacks { Second->PragmaDiagnostic(Loc, Namespace, mapping, Str); } + void HasEmbed(SourceLocation Loc, StringRef FileName, bool IsAngled, + OptionalFileEntryRef File) override { + First->HasEmbed(Loc, FileName, IsAngled, File); + Second->HasEmbed(Loc, FileName, IsAngled, File); + } + void HasInclude(SourceLocation Loc, StringRef FileName, bool IsAngled, OptionalFileEntryRef File, - SrcMgr::CharacteristicKind FileType) override; + SrcMgr::CharacteristicKind FileType) override { + First->HasInclude(Loc, FileName, IsAngled, File, FileType); + Second->HasInclude(Loc, FileName, IsAngled, File, FileType); + } void PragmaOpenCLExtension(SourceLocation NameLoc, const IdentifierInfo *Name, SourceLocation StateLoc, unsigned State) override { diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 18d88407ae12c9..7470bf5882730c 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -31,6 +31,7 @@ #include "clang/Lex/PPCallbacks.h" #include "clang/Lex/Token.h" #include "clang/Lex/TokenLexer.h" +#include "llvm/ADT/APSInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" @@ -53,6 +54,7 @@ #include #include #include +#include #include namespace llvm { @@ -165,6 +167,7 @@ class Preprocessor { IdentifierInfo *Ident__has_builtin; // __has_builtin IdentifierInfo *Ident__has_constexpr_builtin; // __has_constexpr_builtin IdentifierInfo *Ident__has_attribute; // __has_attribute + IdentifierInfo *Ident__has_embed; // __has_embed IdentifierInfo *Ident__has_include; // __has_include IdentifierInfo *Ident__has_include_next; // __has_include_next IdentifierInfo *Ident__has_warning; // __has_warning @@ -206,7 +209,10 @@ class Preprocessor { enum { /// Maximum depth of \#includes. - MaxAllowedIncludeStackDepth = 200 + MaxAllowedIncludeStackDepth = 200, + VALUE__STDC_EMBED_NOT_FOUND__ = 0, + VALUE__STDC_EMBED_FOUND__ = 1, + VALUE__STDC_EMBED_EMPTY__ = 2, }; // State that is set before the preprocessor begins. @@ -1728,6 +1734,22 @@ class Preprocessor { /// Lex a token, forming a header-name token if possible. bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true); + struct LexEmbedParametersResult { + bool Successful; + std::optional MaybeLimitParam; + std::optional MaybeOffsetParam; + std::optional> MaybeIfEmptyParam; + std::optional> MaybePrefixParam; + std::optional> MaybeSuffixParam; + int UnrecognizedParams; + SourceLocation StartLoc; + SourceLocation EndLoc; + }; + + LexEmbedParametersResult LexEmbedParameters(Token &Current, + bool InHasEmbed = false, + bool DiagnoseUnknown = true); + bool LexAfterModuleImport(Token &Result); void CollectPpImportSuffix(SmallVectorImpl &Toks); @@ -2413,6 +2435,17 @@ class Preprocessor { bool *IsFrameworkFound, bool SkipCache = false, bool OpenFile = true, bool CacheFailures = true); + /// Given a "foo" or \ reference, look up the indicated embed resource. + /// + /// Returns std::nullopt on failure. \p isAngled indicates whether the file + /// reference is for system \#include's or not (i.e. using <> instead of ""). + OptionalFileEntryRef + LookupEmbedFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled, + bool OpenFile, + const FileEntry *LookupFromFile = nullptr, + SmallVectorImpl *SearchPath = nullptr, + SmallVectorImpl *RelativePath = nullptr); + /// Return true if we're in the top-level file, not in a \#include. bool isInPrimaryFile() const; @@ -2517,6 +2550,9 @@ class Preprocessor { /// Information about the result for evaluating an expression for a /// preprocessor directive. struct DirectiveEvalResult { + /// The integral value of the expression. + std::optional Value; + /// Whether the expression was evaluated as true or not. bool Conditional; @@ -2531,7 +2567,24 @@ class Preprocessor { /// \#if or \#elif directive and return a \p DirectiveEvalResult object. /// /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro. - DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro); + DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro, + bool CheckForEoD = true, + bool Parenthesized = false); + + /// Evaluate an integer constant expression that may occur after a + /// \#if or \#elif directive and return a \p DirectiveEvalResult object. + /// + /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro. + DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro, + Token &Tok, + bool CheckForEoD = true, + bool Parenthesized = false); + + /// Process a '__has_embed("path" [, ...])' expression. + /// + /// Returns predefined `__STDC_EMBED_*` macro values if + /// successful. + int EvaluateHasEmbed(Token &Tok, IdentifierInfo *II); /// Process a '__has_include("path")' expression. /// @@ -2679,6 +2732,15 @@ class Preprocessor { const FileEntry *LookupFromFile, StringRef &LookupFilename, SmallVectorImpl &RelativePath, SmallVectorImpl &SearchPath, ModuleMap::KnownHeader &SuggestedModule, bool isAngled); + // Binary data inclusion + void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok, + const FileEntry *LookupFromFile = nullptr); + void HandleEmbedDirectiveNaive( + SourceLocation FilenameTok, LexEmbedParametersResult &Params, + StringRef BinaryContents, const size_t TargetCharWidth); + void HandleEmbedDirectiveBuiltin( + SourceLocation FilenameTok, LexEmbedParametersResult &Params, + StringRef BinaryContents, const size_t TargetCharWidth); // File inclusion. void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok, diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h index 058194bcde72e5..23f3458d79e031 100644 --- a/clang/include/clang/Lex/PreprocessorOptions.h +++ b/clang/include/clang/Lex/PreprocessorOptions.h @@ -167,6 +167,13 @@ class PreprocessorOptions { /// of the specified memory buffer (the second part of each pair). std::vector> RemappedFileBuffers; + /// User specified embed entries. + std::vector EmbedEntries; + + /// Whether or not naive expansion should be used all the time for + /// builtin embed + bool NoBuiltinPPEmbed = false; + /// Whether the compiler instance should retain (i.e., not free) /// the buffers associated with remapped files. /// diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp index d16626b1065213..e0e80b5e0fbedb 100644 --- a/clang/lib/Basic/FileManager.cpp +++ b/clang/lib/Basic/FileManager.cpp @@ -537,13 +537,19 @@ void FileManager::fillRealPathName(FileEntry *UFE, llvm::StringRef FileName) { llvm::ErrorOr> FileManager::getBufferForFile(FileEntryRef FE, bool isVolatile, - bool RequiresNullTerminator) { + bool RequiresNullTerminator, + std::optional MaybeLimit) { const FileEntry *Entry = &FE.getFileEntry(); // If the content is living on the file entry, return a reference to it. if (Entry->Content) return llvm::MemoryBuffer::getMemBuffer(Entry->Content->getMemBufferRef()); uint64_t FileSize = Entry->getSize(); + + if (MaybeLimit) + FileSize = *MaybeLimit; + + // If there's a high enough chance that the file have changed since we // got its size, force a stat before opening it. if (isVolatile || Entry->isNamedPipe()) diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index e5599d54554108..d2b5426d27bb3b 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -423,7 +423,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const { // case values). Note that this depends on 'if' being null terminated. #define HASH(LEN, FIRST, THIRD) \ - (LEN << 5) + (((FIRST-'a') + (THIRD-'a')) & 31) + (LEN << 6) + (((FIRST-'a') - (THIRD-'a')) & 63) #define CASE(LEN, FIRST, THIRD, NAME) \ case HASH(LEN, FIRST, THIRD): \ return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME @@ -438,6 +438,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const { CASE( 4, 'e', 's', else); CASE( 4, 'l', 'n', line); CASE( 4, 's', 'c', sccs); + CASE( 5, 'e', 'b', embed); CASE( 5, 'e', 'd', endif); CASE( 5, 'e', 'r', error); CASE( 5, 'i', 'e', ident); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index b91126ebed0186..fc2f749a34fc47 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1324,7 +1324,7 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA, Args.addAllArgs(CmdArgs, {options::OPT_D, options::OPT_U, options::OPT_I_Group, - options::OPT_F, options::OPT_index_header_map}); + options::OPT_F, options::OPT_index_header_map, options::OPT_EmbedPath_Group}); // Add -Wp, and -Xpreprocessor if using the preprocessor. @@ -8182,6 +8182,9 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, // Pass along any -I options so we get proper .include search paths. Args.AddAllArgs(CmdArgs, options::OPT_I_Group); + // Pass along any -embed-dir or similar options so we get proper embed paths. + Args.AddAllArgs(CmdArgs, options::OPT_EmbedPath_Group); + // Determine the original source input. auto FindSource = [](const Action *S) -> const Action * { while (S->getKind() != Action::InputClass) { diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index 606e9e790ad833..232626e783e1b7 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -1008,6 +1008,7 @@ struct AdditionalKeywords { kw_synchronized = &IdentTable.get("synchronized"); kw_throws = &IdentTable.get("throws"); kw___except = &IdentTable.get("__except"); + kw___has_embed = &IdentTable.get("__has_embed"); kw___has_include = &IdentTable.get("__has_include"); kw___has_include_next = &IdentTable.get("__has_include_next"); @@ -1305,6 +1306,7 @@ struct AdditionalKeywords { IdentifierInfo *kw_NS_ERROR_ENUM; IdentifierInfo *kw_NS_OPTIONS; IdentifierInfo *kw___except; + IdentifierInfo *kw___has_embed; IdentifierInfo *kw___has_include; IdentifierInfo *kw___has_include_next; diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 543c119620bf28..e405a9085951dc 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1400,6 +1400,9 @@ class AnnotatingParser { Keywords.kw___has_include_next)) { parseHasInclude(); } + else if (Tok->is(Keywords.kw___has_embed)) { + parseHasEmbed(); + } if (Style.isCSharp() && Tok->is(Keywords.kw_where) && Tok->Next && Tok->Next->isNot(tok::l_paren)) { Tok->setType(TT_CSharpGenericTypeConstraint); @@ -1464,6 +1467,21 @@ class AnnotatingParser { } } + void parseEmbedDirective() { + if (CurrentToken && CurrentToken->is(tok::less)) { + next(); + while (CurrentToken) { + // Mark tokens up to the trailing line comments as implicit string + // literals. + if (CurrentToken->isNot(tok::comment) && + !CurrentToken->TokenText.startswith("//")) { + CurrentToken->setType(TT_ImplicitStringLiteral); + } + next(); + } + } + } + void parseWarningOrError() { next(); // We still want to format the whitespace left of the first token of the @@ -1500,6 +1518,14 @@ class AnnotatingParser { next(); // ')' } + void parseHasEmbed() { + if (!CurrentToken || CurrentToken->isNot(tok::l_paren)) + return; + next(); // '(' + parseEmbedDirective(); + next(); // ')' + } + LineType parsePreprocessorDirective() { bool IsFirstToken = CurrentToken->IsFirst; LineType Type = LT_PreprocessorDirective; @@ -1563,6 +1589,8 @@ class AnnotatingParser { } else if (Tok->isOneOf(Keywords.kw___has_include, Keywords.kw___has_include_next)) { parseHasInclude(); + } else if (Tok->is(Keywords.kw___has_embed)) { + parseHasEmbed(); } } return Type; diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index bb442495f58359..05406b5d42d738 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -4302,6 +4302,12 @@ static void GeneratePreprocessorArgs(const PreprocessorOptions &Opts, if (Opts.SourceDateEpoch) GenerateArg(Consumer, OPT_source_date_epoch, Twine(*Opts.SourceDateEpoch)); + for (const auto &EmbedEntry : Opts.EmbedEntries) + GenerateArg(Consumer, OPT_embed_dir, EmbedEntry); + + if (Opts.NoBuiltinPPEmbed) + GenerateArg(Consumer, OPT_fno_builtin_, "pp_embed"); + // Don't handle LexEditorPlaceholders. It is implied by the action that is // generated elsewhere. } @@ -4394,6 +4400,19 @@ static bool ParsePreprocessorArgs(PreprocessorOptions &Opts, ArgList &Args, } } + for (const auto *A : Args.filtered(OPT_embed_dir, OPT_embed_dir_EQ)) { + StringRef Val = A->getValue(); + Opts.EmbedEntries.push_back(std::string(Val)); + } + + // Can disable the internal embed builtin / token + for (const auto *A : Args.filtered(OPT_fno_builtin, OPT_fno_builtin_)) { + StringRef Val = A->getValue(); + if (Val == "pp_embed") { + Opts.NoBuiltinPPEmbed = true; + } + } + // Always avoid lexing editor placeholders when we're just running the // preprocessor as we never want to emit the // "editor placeholder in source file" error in PP only mode. diff --git a/clang/lib/Frontend/DependencyFile.cpp b/clang/lib/Frontend/DependencyFile.cpp index c2f6f41ae291ef..10558b1d34bf62 100644 --- a/clang/lib/Frontend/DependencyFile.cpp +++ b/clang/lib/Frontend/DependencyFile.cpp @@ -65,6 +65,21 @@ struct DepCollectorPPCallbacks : public PPCallbacks { /*IsMissing=*/false); } + void EmbedDirective(SourceLocation HashLoc, + StringRef FileName, bool IsAngled, + CharSourceRange FilenameRange, CharSourceRange ParametersRange, + OptionalFileEntryRef File, StringRef SearchPath, + StringRef RelativePath) override { + if (!File) + DepCollector.maybeAddDependency(FileName, + /*FromModule*/ false, + /*IsSystem*/ false, + /*IsModuleFile*/ false, + &PP.getFileManager(), + /*IsMissing*/ true); + // Files that actually exist are handled by FileChanged. + } + void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, CharSourceRange FilenameRange, @@ -81,6 +96,20 @@ struct DepCollectorPPCallbacks : public PPCallbacks { // Files that actually exist are handled by FileChanged. } + void HasEmbed(SourceLocation Loc, StringRef SpelledFilename, bool IsAngled, + OptionalFileEntryRef File) override { + if (!File) + return; + StringRef Filename = + llvm::sys::path::remove_leading_dotslash(File->getName()); + DepCollector.maybeAddDependency(Filename, + /*FromModule=*/false, + false, + /*IsModuleFile=*/false, + &PP.getFileManager(), + /*IsMissing=*/false); + } + void HasInclude(SourceLocation Loc, StringRef SpelledFilename, bool IsAngled, OptionalFileEntryRef File, SrcMgr::CharacteristicKind FileType) override { diff --git a/clang/lib/Frontend/DependencyGraph.cpp b/clang/lib/Frontend/DependencyGraph.cpp index 6aad04370f6e7a..683f751a94244e 100644 --- a/clang/lib/Frontend/DependencyGraph.cpp +++ b/clang/lib/Frontend/DependencyGraph.cpp @@ -26,6 +26,14 @@ namespace DOT = llvm::DOT; namespace { class DependencyGraphCallback : public PPCallbacks { +public: + enum DirectiveBehavior { + Normal = 0, + IgnoreEmbed = 0b01, + IgnoreInclude = 0b10, + }; + +private: const Preprocessor *PP; std::string OutputFile; std::string SysRoot; @@ -34,6 +42,7 @@ class DependencyGraphCallback : public PPCallbacks { llvm::DenseMap>; DependencyMap Dependencies; + DirectiveBehavior Behavior; private: raw_ostream &writeNodeReference(raw_ostream &OS, @@ -42,7 +51,8 @@ class DependencyGraphCallback : public PPCallbacks { public: DependencyGraphCallback(const Preprocessor *_PP, StringRef OutputFile, - StringRef SysRoot) + StringRef SysRoot, + DirectiveBehavior Action = IgnoreEmbed) : PP(_PP), OutputFile(OutputFile.str()), SysRoot(SysRoot.str()) { } void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, @@ -52,6 +62,12 @@ class DependencyGraphCallback : public PPCallbacks { StringRef RelativePath, const Module *Imported, SrcMgr::CharacteristicKind FileType) override; + void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled, + CharSourceRange FilenameRange, + CharSourceRange ParametersRange, + OptionalFileEntryRef File, StringRef SearchPath, + StringRef RelativePath) override; + void EndOfMainFile() override { OutputGraphFile(); } @@ -70,6 +86,31 @@ void DependencyGraphCallback::InclusionDirective( bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File, StringRef SearchPath, StringRef RelativePath, const Module *Imported, SrcMgr::CharacteristicKind FileType) { + if ((Behavior & IgnoreInclude) == IgnoreInclude) { + return; + } + if (!File) + return; + + SourceManager &SM = PP->getSourceManager(); + OptionalFileEntryRef FromFile = + SM.getFileEntryRefForID(SM.getFileID(SM.getExpansionLoc(HashLoc))); + if (!FromFile) + return; + + Dependencies[*FromFile].push_back(*File); + + AllFiles.insert(*File); + AllFiles.insert(*FromFile); +} + +void DependencyGraphCallback::EmbedDirective( + SourceLocation HashLoc, StringRef FileName, bool IsAngled, + CharSourceRange FilenameRange, CharSourceRange ParametersRange, + OptionalFileEntryRef File, StringRef SearchPath, StringRef RelativePath) { + if ((Behavior & IgnoreEmbed) == IgnoreEmbed) { + return; + } if (!File) return; diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 846e5fce6de7b2..b7d084773b0a19 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -498,6 +498,11 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI, Builder.defineMacro("__STDC_UTF_16__", "1"); Builder.defineMacro("__STDC_UTF_32__", "1"); + // __has_embed definitions + Builder.defineMacro("__STDC_EMBED_NOT_FOUND__", "0"); + Builder.defineMacro("__STDC_EMBED_FOUND__", "1"); + Builder.defineMacro("__STDC_EMBED_EMPTY__", "2"); + if (LangOpts.ObjC) Builder.defineMacro("__OBJC__"); @@ -729,6 +734,8 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts, if (LangOpts.Char8) Builder.defineMacro("__cpp_char8_t", "202207L"); Builder.defineMacro("__cpp_impl_destroying_delete", "201806L"); + + Builder.defineMacro("__cpp_pp_embed", "202403L"); } /// InitializeOpenCLFeatureTestMacros - Define OpenCL macros based on target diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp index 7f5f6690682300..fb9baa92e6836d 100644 --- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp +++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp @@ -93,6 +93,7 @@ class PrintPPOutputPPCallbacks : public PPCallbacks { bool DisableLineMarkers; bool DumpDefines; bool DumpIncludeDirectives; + bool DumpEmbedDirectives; bool UseLineDirectives; bool IsFirstFileEntered; bool MinimizeWhitespace; @@ -106,12 +107,13 @@ class PrintPPOutputPPCallbacks : public PPCallbacks { public: PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream *os, bool lineMarkers, - bool defines, bool DumpIncludeDirectives, + bool defines, bool DumpIncludeDirectives, bool DumpEmbedDirectives, bool UseLineDirectives, bool MinimizeWhitespace, bool DirectivesOnly, bool KeepSystemIncludes) : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os), DisableLineMarkers(lineMarkers), DumpDefines(defines), DumpIncludeDirectives(DumpIncludeDirectives), + DumpEmbedDirectives(DumpEmbedDirectives), UseLineDirectives(UseLineDirectives), MinimizeWhitespace(MinimizeWhitespace), DirectivesOnly(DirectivesOnly), KeepSystemIncludes(KeepSystemIncludes), OrigOS(os) { @@ -149,6 +151,11 @@ class PrintPPOutputPPCallbacks : public PPCallbacks { void FileChanged(SourceLocation Loc, FileChangeReason Reason, SrcMgr::CharacteristicKind FileType, FileID PrevFID) override; + void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled, + CharSourceRange FilenameRange, + CharSourceRange ParametersRange, + OptionalFileEntryRef File, StringRef SearchPath, + StringRef RelativePath) override; void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, CharSourceRange FilenameRange, @@ -398,6 +405,20 @@ void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc, } } +void PrintPPOutputPPCallbacks::EmbedDirective( + SourceLocation HashLoc, StringRef FileName, bool IsAngled, + CharSourceRange FilenameRange, CharSourceRange ParametersRange, + OptionalFileEntryRef File, StringRef SearchPath, StringRef RelativePath) { + // In -dI mode, dump #include directives prior to dumping their content or + // interpretation. + if (DumpEmbedDirectives) { + MoveToLine(HashLoc, /*RequireStartOfLine=*/true); + *OS << "#embed " << (IsAngled ? '<' : '"') << FileName + << (IsAngled ? '>' : '"') << " /* clang -E -dE */"; + setEmittedDirectiveOnThisLine(); + } +} + void PrintPPOutputPPCallbacks::InclusionDirective( SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File, @@ -981,7 +1002,7 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS, PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks( PP, OS, !Opts.ShowLineMarkers, Opts.ShowMacros, - Opts.ShowIncludeDirectives, Opts.UseLineDirectives, + Opts.ShowIncludeDirectives, Opts.ShowEmbedDirectives, Opts.UseLineDirectives, Opts.MinimizeWhitespace, Opts.DirectivesOnly, Opts.KeepSystemIncludes); // Expand macros in pragmas with -fms-extensions. The assumption is that diff --git a/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp b/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp index 28f7b0b9edfc5c..4a73946951fd9c 100644 --- a/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp +++ b/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp @@ -71,6 +71,11 @@ class InclusionRewriter : public PPCallbacks { FileID PrevFID) override; void FileSkipped(const FileEntryRef &SkippedFile, const Token &FilenameTok, SrcMgr::CharacteristicKind FileType) override; + void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled, + CharSourceRange FilenameRange, + CharSourceRange ParametersRange, + OptionalFileEntryRef File, StringRef SearchPath, + StringRef RelativePath) override; void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, CharSourceRange FilenameRange, @@ -177,6 +182,14 @@ void InclusionRewriter::FileSkipped(const FileEntryRef & /*SkippedFile*/, LastInclusionLocation = SourceLocation(); } +/// This should be called whenever the preprocessor encounters embed +/// directives. +void InclusionRewriter::EmbedDirective( + SourceLocation /*HashLoc*/, StringRef /*FileName*/, bool /*IsAngled*/, + CharSourceRange /*FilenameRange*/, CharSourceRange /*ParametersRange*/, + OptionalFileEntryRef /*File*/, StringRef /*SearchPath*/, + StringRef /*RelativePath*/) {} + /// This should be called whenever the preprocessor encounters include /// directives. It does not say whether the file has been included, but it /// provides more information about the directive (hash location instead diff --git a/clang/lib/Lex/PPCallbacks.cpp b/clang/lib/Lex/PPCallbacks.cpp index f2b60a728e9017..ea5dce2c27a587 100644 --- a/clang/lib/Lex/PPCallbacks.cpp +++ b/clang/lib/Lex/PPCallbacks.cpp @@ -14,16 +14,5 @@ using namespace clang; // Out of line key method. PPCallbacks::~PPCallbacks() = default; -void PPCallbacks::HasInclude(SourceLocation Loc, StringRef FileName, - bool IsAngled, OptionalFileEntryRef File, - SrcMgr::CharacteristicKind FileType) {} - // Out of line key method. PPChainedCallbacks::~PPChainedCallbacks() = default; - -void PPChainedCallbacks::HasInclude(SourceLocation Loc, StringRef FileName, - bool IsAngled, OptionalFileEntryRef File, - SrcMgr::CharacteristicKind FileType) { - First->HasInclude(Loc, FileName, IsAngled, File, FileType); - Second->HasInclude(Loc, FileName, IsAngled, File, FileType); -} diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index e3065c17dc70b4..e0d98d7ca03fa1 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -18,7 +18,9 @@ #include "clang/Basic/Module.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" +#include "clang/Basic/TargetInfo.h" #include "clang/Basic/TokenKinds.h" +#include "clang/Frontend/FrontendOptions.h" #include "clang/Lex/CodeCompletionHandler.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/LexDiagnostic.h" @@ -1079,6 +1081,101 @@ OptionalFileEntryRef Preprocessor::LookupFile( return std::nullopt; } +OptionalFileEntryRef Preprocessor::LookupEmbedFile( + SourceLocation FilenameLoc, StringRef Filename, bool isAngled, + bool OpenFile, const FileEntry *LookupFromFile, + SmallVectorImpl *SearchPath, SmallVectorImpl *RelativePath) { + FileManager &FM = this->getFileManager(); + if (llvm::sys::path::is_absolute(Filename)) { + // lookup path or immediately fail + llvm::Expected ShouldBeEntry = + FM.getFileRef(Filename, true, OpenFile); + return llvm::expectedToOptional(std::move(ShouldBeEntry)); + } + + // Otherwise, it's search time! + SmallString<512> LookupPath; + // Non-angled lookup + if (!isAngled) { + bool TryLocalLookup = false; + if (SearchPath) { + // use the provided search path as the local lookup path + llvm::sys::path::native(*SearchPath, LookupPath); + TryLocalLookup = true; + } else if (LookupFromFile) { + // Use file-based lookup here + StringRef FullFileDir = LookupFromFile->tryGetRealPathName(); + if (!FullFileDir.empty()) { + llvm::sys::path::native(FullFileDir, LookupPath); + llvm::sys::path::remove_filename(LookupPath); + TryLocalLookup = true; + } + } else { + // Cannot do local lookup: give up. + TryLocalLookup = false; + } + if (TryLocalLookup) { + if (!LookupPath.empty() && + !llvm::sys::path::is_separator(LookupPath.back())) { + LookupPath.append(llvm::sys::path::get_separator()); + } + LookupPath.append(Filename); + llvm::Expected ShouldBeEntry = + FM.getFileRef(LookupPath, true, OpenFile); + if (ShouldBeEntry) { + return std::move(*ShouldBeEntry); + } else { + llvm::consumeError(ShouldBeEntry.takeError()); + } + } + } + + if (!isAngled) { + // do working directory lookup + LookupPath.clear(); + auto MaybeWorkingDirEntry = FM.getDirectoryRef("."); + if (MaybeWorkingDirEntry) { + DirectoryEntryRef WorkingDirEntry = *MaybeWorkingDirEntry; + StringRef WorkingDir = WorkingDirEntry.getName(); + if (!WorkingDir.empty()) { + llvm::sys::path::native(WorkingDir, LookupPath); + if (!LookupPath.empty() && + !llvm::sys::path::is_separator(LookupPath.back())) { + LookupPath.append(llvm::sys::path::get_separator()); + } + LookupPath.append(llvm::sys::path::get_separator()); + LookupPath.append(Filename); + llvm::Expected ShouldBeEntry = + FM.getFileRef(LookupPath, true, OpenFile); + if (ShouldBeEntry) { + return std::move(*ShouldBeEntry); + } else { + llvm::consumeError(ShouldBeEntry.takeError()); + } + } + } + } + + for (const auto &Entry : PPOpts->EmbedEntries) { + LookupPath.clear(); + llvm::sys::path::native(Entry, LookupPath); + if (!LookupPath.empty() && + !llvm::sys::path::is_separator(LookupPath.back())) { + LookupPath.append(llvm::sys::path::get_separator()); + } + LookupPath.append(Filename.begin(), Filename.end()); + llvm::sys::path::native(LookupPath); + llvm::Expected ShouldBeEntry = + FM.getFileRef(LookupPath, true, OpenFile); + if (ShouldBeEntry) { + return std::move(*ShouldBeEntry); + } else { + llvm::consumeError(ShouldBeEntry.takeError()); + } + } + return std::nullopt; +} + //===----------------------------------------------------------------------===// // Preprocessor Directive Handling. //===----------------------------------------------------------------------===// @@ -1174,6 +1271,7 @@ void Preprocessor::HandleDirective(Token &Result) { case tok::pp_include_next: case tok::pp___include_macros: case tok::pp_pragma: + case tok::pp_embed: Diag(Result, diag::err_embedded_directive) << II->getName(); Diag(*ArgMacro, diag::note_macro_expansion_here) << ArgMacro->getIdentifierInfo(); @@ -1288,6 +1386,11 @@ void Preprocessor::HandleDirective(Token &Result) { return HandleIdentSCCSDirective(Result); case tok::pp_sccs: return HandleIdentSCCSDirective(Result); + case tok::pp_embed: + return HandleEmbedDirective(SavedHash.getLocation(), Result, + getCurrentFileLexer() + ? getCurrentFileLexer()->getFileEntry() + : nullptr); case tok::pp_assert: //isExtension = true; // FIXME: implement #assert break; @@ -3517,3 +3620,400 @@ void Preprocessor::HandleElifFamilyDirective(Token &ElifToken, HashToken.getLocation(), CI.IfLoc, /*Foundnonskip*/ true, /*FoundElse*/ CI.FoundElse, ElifToken.getLocation()); } + +enum class BracketType { Brace, Paren, Square }; + +Preprocessor::LexEmbedParametersResult +Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed, + bool DiagnoseUnknown) { + LexEmbedParametersResult Result{}; + SmallString<32> Parameter; + SmallVector ParameterTokens; + tok::TokenKind EndTokenKind = InHasEmbed ? tok::r_paren : tok::eod; + Result.StartLoc = CurTok.getLocation(); + for (LexNonComment(CurTok); CurTok.isNot(EndTokenKind);) { + Parameter.clear(); + // Lex identifier [:: identifier ...] + if (!CurTok.is(tok::identifier)) { + Diag(CurTok, diag::err_expected) << "identifier"; + DiscardUntilEndOfDirective(); + return Result; + } + Token ParameterStartTok = CurTok; + IdentifierInfo *InitialID = CurTok.getIdentifierInfo(); + Parameter.append(InitialID->getName()); + for (LexNonComment(CurTok); CurTok.is(tok::coloncolon); + LexNonComment(CurTok)) { + Parameter.append("::"); + LexNonComment(CurTok); + if (!CurTok.is(tok::identifier)) { + Diag(CurTok, diag::err_expected) << "identifier"; + DiscardUntilEndOfDirective(); + return Result; + } + IdentifierInfo *NextID = CurTok.getIdentifierInfo(); + Parameter.append(NextID->getName()); + } + // Lex the parameters (dependent on the parameter type we want!) + if (Parameter == "limit") { + // we have a limit parameter and its internals are processed using + // evaluation rules from #if - handle here + if (CurTok.isNot(tok::l_paren)) { + Diag(CurTok, diag::err_pp_expected_after) << "(" << Parameter; + DiscardUntilEndOfDirective(); + return Result; + } + IdentifierInfo *ParameterIfNDef = nullptr; + DirectiveEvalResult LimitEvalResult = + EvaluateDirectiveExpression(ParameterIfNDef, CurTok, false, true); + if (!LimitEvalResult.Value) { + return Result; + } + const llvm::APSInt &LimitResult = *LimitEvalResult.Value; + const bool ValueDoesNotFit = + LimitResult.getBitWidth() > 64 + ? true + : (LimitResult.isUnsigned() || + (LimitResult.isSigned() && LimitResult.isNegative())); + if (ValueDoesNotFit) { + Diag(CurTok, diag::warn_pp_expr_overflow); + // just truncate and roll with that, I guess? + Result.MaybeLimitParam = + static_cast(LimitResult.getRawData()[0]); + } else { + Result.MaybeLimitParam = + static_cast(LimitResult.getZExtValue()); + } + LexNonComment(CurTok); + } else if (Parameter == "clang::offset") { + // we have a limit parameter and its internals are processed using + // evaluation rules from #if - handle here + if (CurTok.isNot(tok::l_paren)) { + Diag(CurTok, diag::err_pp_expected_after) << "(" << Parameter; + DiscardUntilEndOfDirective(); + return Result; + } + IdentifierInfo *ParameterIfNDef = nullptr; + DirectiveEvalResult OffsetEvalResult = + EvaluateDirectiveExpression(ParameterIfNDef, CurTok, false, true); + if (!OffsetEvalResult.Value) { + return Result; + } + const llvm::APSInt &OffsetResult = *OffsetEvalResult.Value; + if (OffsetResult.getBitWidth() > 64) { + Diag(CurTok, diag::warn_pp_expr_overflow); + // just truncate and roll with that, I guess? + Result.MaybeOffsetParam = + static_cast(OffsetResult.getRawData()[0]); + } else { + Result.MaybeOffsetParam = + static_cast(OffsetResult.getZExtValue()); + } + LexNonComment(CurTok); + } else { + if (CurTok.is(tok::l_paren)) { + SmallVector Brackets; + Brackets.push_back(BracketType::Paren); + auto ParseArgToken = [&]() { + for (LexNonComment(CurTok); CurTok.isNot(tok::eod); + LexNonComment(CurTok)) { + switch (CurTok.getKind()) { + default: + break; + case tok::l_paren: + Brackets.push_back(BracketType::Paren); + break; + case tok::r_paren: + if (Brackets.back() != BracketType::Paren) { + Diag(CurTok, diag::err_pp_expected_rparen); + return false; + } + Brackets.pop_back(); + if (Brackets.empty()) { + return true; + } + break; + case tok::l_brace: + Brackets.push_back(BracketType::Brace); + break; + case tok::r_brace: + if (Brackets.back() != BracketType::Brace) { + Diag(CurTok, diag::err_expected) << "}"; + return false; + } + Brackets.pop_back(); + break; + case tok::l_square: + Brackets.push_back(BracketType::Square); + break; + case tok::r_square: + if (Brackets.back() != BracketType::Square) { + Diag(CurTok, diag::err_expected) << "]"; + return false; + } + Brackets.pop_back(); + break; + } + ParameterTokens.push_back(CurTok); + } + if (!Brackets.empty()) { + Diag(CurTok, diag::err_pp_expected_rparen); + DiscardUntilEndOfDirective(); + return false; + } + return true; + }; + if (!ParseArgToken()) { + return Result; + } + if (!CurTok.is(tok::r_paren)) { + Diag(CurTok, diag::err_pp_expected_rparen); + DiscardUntilEndOfDirective(); + return Result; + } + Lex(CurTok); + } + // "Token-soup" parameters + if (Parameter == "if_empty") { + // TODO: integer list optimization + Result.MaybeIfEmptyParam = std::move(ParameterTokens); + } else if (Parameter == "prefix") { + // TODO: integer list optimization + Result.MaybePrefixParam = std::move(ParameterTokens); + } else if (Parameter == "suffix") { + // TODO: integer list optimization + Result.MaybeSuffixParam = std::move(ParameterTokens); + } else { + ++Result.UnrecognizedParams; + if (DiagnoseUnknown) { + Diag(ParameterStartTok, diag::warn_pp_unknown_parameter_ignored) + << 1 << Parameter; + } + } + } + } + Result.Successful = true; + return Result; +} + +// This array must survive for an extended period of time +inline constexpr const char *IntegerLiterals[] = { + "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", + "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", + "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", + "33", "34", "35", "36", "37", "38", "39", "40", "41", "42", "43", + "44", "45", "46", "47", "48", "49", "50", "51", "52", "53", "54", + "55", "56", "57", "58", "59", "60", "61", "62", "63", "64", "65", + "66", "67", "68", "69", "70", "71", "72", "73", "74", "75", "76", + "77", "78", "79", "80", "81", "82", "83", "84", "85", "86", "87", + "88", "89", "90", "91", "92", "93", "94", "95", "96", "97", "98", + "99", "100", "101", "102", "103", "104", "105", "106", "107", "108", "109", + "110", "111", "112", "113", "114", "115", "116", "117", "118", "119", "120", + "121", "122", "123", "124", "125", "126", "127", "128", "129", "130", "131", + "132", "133", "134", "135", "136", "137", "138", "139", "140", "141", "142", + "143", "144", "145", "146", "147", "148", "149", "150", "151", "152", "153", + "154", "155", "156", "157", "158", "159", "160", "161", "162", "163", "164", + "165", "166", "167", "168", "169", "170", "171", "172", "173", "174", "175", + "176", "177", "178", "179", "180", "181", "182", "183", "184", "185", "186", + "187", "188", "189", "190", "191", "192", "193", "194", "195", "196", "197", + "198", "199", "200", "201", "202", "203", "204", "205", "206", "207", "208", + "209", "210", "211", "212", "213", "214", "215", "216", "217", "218", "219", + "220", "221", "222", "223", "224", "225", "226", "227", "228", "229", "230", + "231", "232", "233", "234", "235", "236", "237", "238", "239", "240", "241", + "242", "243", "244", "245", "246", "247", "248", "249", "250", "251", "252", + "253", "254", "255"}; + +void Preprocessor::HandleEmbedDirectiveNaive(SourceLocation FilenameLoc, + LexEmbedParametersResult &Params, + StringRef BinaryContents, + const size_t TargetCharWidth) { + (void)TargetCharWidth; // for later, when we support various sizes + size_t TokenIndex = 0; + const size_t InitListTokensSize = [&]() { + if (BinaryContents.empty()) { + if (Params.MaybeIfEmptyParam) { + return Params.MaybeIfEmptyParam->size(); + } else { + return static_cast(0); + } + } else { + return static_cast( + (Params.MaybePrefixParam ? Params.MaybePrefixParam->size() : 0) + + (BinaryContents.size() * 2 - 1) + + (Params.MaybeSuffixParam ? Params.MaybeSuffixParam->size() : 0)); + } + }(); + std::unique_ptr InitListTokens(new Token[InitListTokensSize]()); + + if (BinaryContents.empty()) { + if (Params.MaybeIfEmptyParam) { + std::copy(Params.MaybeIfEmptyParam->begin(), + Params.MaybeIfEmptyParam->end(), InitListTokens.get()); + TokenIndex += Params.MaybeIfEmptyParam->size(); + assert(TokenIndex == InitListTokensSize); + EnterTokenStream(std::move(InitListTokens), InitListTokensSize, true, + true); + } + return; + } + + // FIXME: this does not take the target's byte size into account; + // will fail on many DSPs and embedded machines! + if (Params.MaybePrefixParam) { + std::copy(Params.MaybePrefixParam->begin(), Params.MaybePrefixParam->end(), + InitListTokens.get() + TokenIndex); + TokenIndex += Params.MaybePrefixParam->size(); + } + for (size_t I = 0; I < BinaryContents.size(); ++I) { + unsigned char ByteValue = BinaryContents[I]; + StringRef ByteRepresentation = IntegerLiterals[ByteValue]; + const size_t InitListIndex = TokenIndex; + Token &IntToken = InitListTokens[InitListIndex]; + IntToken.setKind(tok::numeric_constant); + IntToken.setLiteralData(ByteRepresentation.data()); + IntToken.setLength(ByteRepresentation.size()); + IntToken.setLocation(FilenameLoc); + ++TokenIndex; + bool AtEndOfContents = I == (BinaryContents.size() - 1); + if (!AtEndOfContents) { + const size_t CommaInitListIndex = InitListIndex + 1; + Token &CommaToken = InitListTokens[CommaInitListIndex]; + CommaToken.setKind(tok::comma); + CommaToken.setLocation(FilenameLoc); + ++TokenIndex; + } + } + if (Params.MaybeSuffixParam) { + std::copy(Params.MaybeSuffixParam->begin(), Params.MaybeSuffixParam->end(), + InitListTokens.get() + TokenIndex); + TokenIndex += Params.MaybeSuffixParam->size(); + } + assert(TokenIndex == InitListTokensSize); + EnterTokenStream(std::move(InitListTokens), InitListTokensSize, true, false); +} + +void Preprocessor::HandleEmbedDirectiveBuiltin(SourceLocation FilenameLoc, + LexEmbedParametersResult &Params, + StringRef BinaryContents, + const size_t TargetCharWidth) { + // TODO: implement direct built-in support + HandleEmbedDirectiveNaive(FilenameLoc, Params, BinaryContents, + TargetCharWidth); +} + +void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok, + const FileEntry *LookupFromFile) { + if (!LangOpts.C23 || !LangOpts.CPlusPlus26) { + auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_c23_pp_embed + : diag::warn_cxx26_pp_embed); + Diag(EmbedTok, EitherDiag); + } + + // Parse the filename header + Token FilenameTok; + if (LexHeaderName(FilenameTok)) + return; + + if (FilenameTok.isNot(tok::header_name)) { + Diag(FilenameTok.getLocation(), diag::err_pp_expects_filename); + if (FilenameTok.isNot(tok::eod)) + DiscardUntilEndOfDirective(); + return; + } + + // Parse the optional sequence of + // directive-parameters: + // identifier parameter-name-list[opt] directive-argument-list[opt] + // directive-argument-list: + // '(' balanced-token-sequence ')' + // parameter-name-list: + // '::' identifier parameter-name-list[opt] + Token CurTok; + LexEmbedParametersResult Params = LexEmbedParameters( + CurTok, /*InHasEmbed=*/false, /*DiagnoseUnknown=*/true); + + // Now, splat the data out! + SmallString<128> FilenameBuffer; + SmallString<512> SearchPath; + SmallString<512> RelativePath; + StringRef Filename = getSpelling(FilenameTok, FilenameBuffer); + SourceLocation FilenameLoc = FilenameTok.getLocation(); + StringRef OriginalFilename = Filename; + bool isAngled = + GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename); + // If GetIncludeFilenameSpelling set the start ptr to null, there was an + // error. + assert(!Filename.empty()); + OptionalFileEntryRef MaybeFileRef = + this->LookupEmbedFile(FilenameLoc, Filename, isAngled, false, + LookupFromFile, &SearchPath, &RelativePath); + if (!MaybeFileRef) { + // could not find file + if (Callbacks && Callbacks->EmbedFileNotFound(OriginalFilename)) { + return; + } + Diag(FilenameTok, diag::err_pp_file_not_found) + << Filename; + return; + } + std::optional MaybeSignedLimit{}; + if (Params.MaybeLimitParam) { + if (static_cast(INT64_MAX) >= *Params.MaybeLimitParam) { + MaybeSignedLimit = static_cast(*Params.MaybeLimitParam); + } + } + llvm::ErrorOr> MaybeFile = getFileManager().getBufferForFile( + *MaybeFileRef, false, false, MaybeSignedLimit); + if (!MaybeFile) { + // could not find file + Diag(FilenameTok, diag::err_cannot_open_file) + << Filename << "a buffer to the contents could not be created"; + return; + } + StringRef BinaryContents = MaybeFile.get()->getBuffer(); + if (Params.MaybeOffsetParam) { + // offsets all the way to the end of the file make for an empty file. + const size_t OffsetParam = *Params.MaybeOffsetParam; + BinaryContents = BinaryContents.substr(OffsetParam); + } + const size_t TargetCharWidth = getTargetInfo().getCharWidth(); + if (TargetCharWidth > 64) { + // Too wide for us to handle + Diag(EmbedTok, diag::err_pp_unsupported_directive) + << 1 + << "CHAR_BIT is too wide for the target architecture to handle " + "properly"; + return; + } + if (TargetCharWidth != 8) { + Diag(EmbedTok, diag::err_pp_unsupported_directive) + << 1 + << "At the moment, we do not have the machinery to support non 8-bit " + "CHAR_BIT targets!"; + return; + } + if (CHAR_BIT % TargetCharWidth != 0) { + Diag(EmbedTok, diag::err_pp_unsupported_directive) + << 1 + << "CHAR_BIT is not evenly divisible by host architecture's byte " + "definition"; + return; + } + if (Callbacks) { + CharSourceRange FilenameSourceRange( + SourceRange(FilenameTok.getLocation(), FilenameTok.getEndLoc()), true); + CharSourceRange ParametersRange(SourceRange(Params.StartLoc, Params.EndLoc), + true); + Callbacks->EmbedDirective(HashLoc, Filename, isAngled, FilenameSourceRange, + ParametersRange, MaybeFileRef, SearchPath, + RelativePath); + } + if (PPOpts->NoBuiltinPPEmbed) { + HandleEmbedDirectiveNaive(FilenameLoc, Params, BinaryContents, + TargetCharWidth); + } else { + // emit a token directly, handle it internally. + HandleEmbedDirectiveBuiltin(FilenameLoc, Params, BinaryContents, + TargetCharWidth); + } +} diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp index 269984aae07bf2..dda5717afc699d 100644 --- a/clang/lib/Lex/PPExpressions.cpp +++ b/clang/lib/Lex/PPExpressions.cpp @@ -868,7 +868,9 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec, /// may occur after a #if or #elif directive. If the expression is equivalent /// to "!defined(X)" return X in IfNDefMacro. Preprocessor::DirectiveEvalResult -Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { +Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro, + Token &Tok, bool CheckForEoD, + bool Parenthesized) { SaveAndRestore PPDir(ParsingIfOrElifDirective, true); // Save the current state of 'DisableMacroExpansion' and reset it to false. If // 'DisableMacroExpansion' is true, then we must be in a macro argument list @@ -880,7 +882,6 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { DisableMacroExpansion = false; // Peek ahead one token. - Token Tok; LexNonComment(Tok); // C99 6.10.1p3 - All expressions are evaluated as intmax_t or uintmax_t. @@ -901,7 +902,8 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { // We cannot trust the source range from the value because there was a // parse error. Track the range manually -- the end of the directive is the // end of the condition range. - return {false, + return {std::nullopt, + false, DT.IncludedUndefinedIds, {ExprStartLoc, ConditionRange.getEnd()}}; } @@ -917,7 +919,10 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { // Restore 'DisableMacroExpansion'. DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective; - return {ResVal.Val != 0, DT.IncludedUndefinedIds, ResVal.getRange()}; + const bool IsNonZero = ResVal.Val != 0; + const SourceRange ValRange = ResVal.getRange(); + return {std::move(ResVal.Val), IsNonZero, DT.IncludedUndefinedIds, + ValRange}; } // Otherwise, we must have a binary operator (e.g. "#if 1 < 2"), so parse the @@ -930,17 +935,34 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro) { // Restore 'DisableMacroExpansion'. DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective; - return {false, DT.IncludedUndefinedIds, ResVal.getRange()}; + const bool IsNonZero = ResVal.Val != 0; + const SourceRange ValRange = ResVal.getRange(); + return {std::move(ResVal.Val), IsNonZero, DT.IncludedUndefinedIds, + ValRange}; } - // If we aren't at the tok::eod token, something bad happened, like an extra - // ')' token. - if (Tok.isNot(tok::eod)) { - Diag(Tok, diag::err_pp_expected_eol); - DiscardUntilEndOfDirective(); + if (CheckForEoD) { + // If we aren't at the tok::eod token, something bad happened, like an extra + // ')' token. + if (Tok.isNot(tok::eod)) { + Diag(Tok, diag::err_pp_expected_eol); + DiscardUntilEndOfDirective(); + } } // Restore 'DisableMacroExpansion'. DisableMacroExpansion = DisableMacroExpansionAtStartOfDirective; - return {ResVal.Val != 0, DT.IncludedUndefinedIds, ResVal.getRange()}; + const bool IsNonZero = ResVal.Val != 0; + const SourceRange ValRange = ResVal.getRange(); + return {std::move(ResVal.Val), IsNonZero, DT.IncludedUndefinedIds, ValRange}; +} + +/// EvaluateDirectiveExpression - Evaluate an integer constant expression that +/// may occur after a #if or #elif directive. If the expression is equivalent +/// to "!defined(X)" return X in IfNDefMacro. +Preprocessor::DirectiveEvalResult Preprocessor::EvaluateDirectiveExpression( + IdentifierInfo *&IfNDefMacro, bool CheckForEoD, bool Parenthesized) { + Token Tok; + return EvaluateDirectiveExpression(IfNDefMacro, Tok, CheckForEoD, + Parenthesized); } diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index b371f8cf7a9c07..6e0163ccc89b7f 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -380,6 +380,7 @@ void Preprocessor::RegisterBuiltinMacros() { Ident__has_c_attribute = nullptr; Ident__has_declspec = RegisterBuiltinMacro(*this, "__has_declspec_attribute"); + Ident__has_embed = RegisterBuiltinMacro(*this, "__has_embed"); Ident__has_include = RegisterBuiltinMacro(*this, "__has_include"); Ident__has_include_next = RegisterBuiltinMacro(*this, "__has_include_next"); Ident__has_warning = RegisterBuiltinMacro(*this, "__has_warning"); @@ -1264,6 +1265,114 @@ static bool EvaluateHasIncludeCommon(Token &Tok, IdentifierInfo *II, return File.has_value(); } +/// EvaluateHasEmbed - Process a '__has_embed("foo" params...)' expression. +/// Returns a filled optional with the value if successful; otherwise, empty. +int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) { + // pedwarn for not being on C23 + if (!LangOpts.C23 || !LangOpts.CPlusPlus26) { + auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_c23_pp_has_embed + : diag::warn_cxx26_pp_has_embed); + Diag(Tok, EitherDiag); + } + + // Save the location of the current token. If a '(' is later found, use + // that location. If not, use the end of this location instead. + SourceLocation LParenLoc = Tok.getLocation(); + + // These expressions are only allowed within a preprocessor directive. + if (!this->isParsingIfOrElifDirective()) { + Diag(LParenLoc, diag::err_pp_directive_required) << II; + // Return a valid identifier token. + assert(Tok.is(tok::identifier)); + Tok.setIdentifierInfo(II); + return VALUE__STDC_EMBED_NOT_FOUND__; + } + + // Get '('. If we don't have a '(', try to form a header-name token. + do { + if (this->LexHeaderName(Tok)) { + return VALUE__STDC_EMBED_NOT_FOUND__; + } + } while (Tok.getKind() == tok::comment); + + // Ensure we have a '('. + if (Tok.isNot(tok::l_paren)) { + // No '(', use end of last token. + LParenLoc = this->getLocForEndOfToken(LParenLoc); + this->Diag(LParenLoc, diag::err_pp_expected_after) << II << tok::l_paren; + // If the next token looks like a filename or the start of one, + // assume it is and process it as such. + if (Tok.isNot(tok::header_name)) { + return VALUE__STDC_EMBED_NOT_FOUND__; + } + } else { + // Save '(' location for possible missing ')' message. + LParenLoc = Tok.getLocation(); + if (this->LexHeaderName(Tok)) { + return VALUE__STDC_EMBED_NOT_FOUND__; + } + } + + if (Tok.isNot(tok::header_name)) { + Diag(Tok.getLocation(), diag::err_pp_expects_filename); + return VALUE__STDC_EMBED_NOT_FOUND__; + } + + SourceLocation FilenameLoc = Tok.getLocation(); + Token FilenameTok = Tok; + + Preprocessor::LexEmbedParametersResult Params = this->LexEmbedParameters(Tok, true, false); + if (!Params.Successful) { + if (Tok.isNot(tok::eod)) + this->DiscardUntilEndOfDirective(); + return VALUE__STDC_EMBED_NOT_FOUND__; + } + if (Params.UnrecognizedParams > 0) { + return VALUE__STDC_EMBED_NOT_FOUND__; + } + + if (!Tok.is(tok::r_paren)) { + Diag(this->getLocForEndOfToken(FilenameLoc), diag::err_pp_expected_after) + << II << tok::r_paren; + Diag(LParenLoc, diag::note_matching) << tok::l_paren; + DiscardUntilEndOfDirective(); + return VALUE__STDC_EMBED_NOT_FOUND__; + } + + + SmallString<128> FilenameBuffer; + SmallString<256> RelativePath; + StringRef Filename = this->getSpelling(FilenameTok, FilenameBuffer); + StringRef OriginalFilename = Filename; + bool isAngled = + this->GetIncludeFilenameSpelling(FilenameTok.getLocation(), Filename); + // If GetIncludeFilenameSpelling set the start ptr to null, there was an + // error. + assert(!Filename.empty()); + const FileEntry *LookupFromFile = + this->getCurrentFileLexer() ? this->getCurrentFileLexer()->getFileEntry() + : nullptr; + OptionalFileEntryRef MaybeFileEntry = + this->LookupEmbedFile(FilenameLoc, Filename, isAngled, false, + LookupFromFile, nullptr, + &RelativePath); + if (Callbacks) { + Callbacks->HasEmbed(LParenLoc, Filename, isAngled, MaybeFileEntry); + } + if (!MaybeFileEntry) { + return VALUE__STDC_EMBED_NOT_FOUND__; + } + size_t FileSize = MaybeFileEntry->getSize(); + if (FileSize == 0 || + (Params.MaybeLimitParam ? *Params.MaybeLimitParam == 0 : false)) { + return VALUE__STDC_EMBED_EMPTY__; + } + if (Params.MaybeOffsetParam && *Params.MaybeOffsetParam >= FileSize) { + return VALUE__STDC_EMBED_EMPTY__; + } + return VALUE__STDC_EMBED_FOUND__; +} + bool Preprocessor::EvaluateHasInclude(Token &Tok, IdentifierInfo *II) { return EvaluateHasIncludeCommon(Tok, II, *this, nullptr, nullptr); } @@ -1801,6 +1910,17 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { return; OS << (int)Value; Tok.setKind(tok::numeric_constant); + } else if (II == Ident__has_embed) { + // The argument to these two builtins should be a parenthesized + // file name string literal using angle brackets (<>) or + // double-quotes (""), optionally followed by a series of + // arguments similar to form like attributes. + int Value = EvaluateHasEmbed(Tok, II); + + if (Tok.isNot(tok::r_paren)) + return; + OS << Value; + Tok.setKind(tok::numeric_constant); } else if (II == Ident__has_warning) { // The argument should be a parenthesized string literal. EvaluateFeatureLikeBuiltinMacro(OS, Tok, II, *this, false, diff --git a/clang/test/Preprocessor/Inputs/jk.txt b/clang/test/Preprocessor/Inputs/jk.txt new file mode 100644 index 00000000000000..93d177a48c83ab --- /dev/null +++ b/clang/test/Preprocessor/Inputs/jk.txt @@ -0,0 +1 @@ +jk \ No newline at end of file diff --git a/clang/test/Preprocessor/Inputs/media/art.txt b/clang/test/Preprocessor/Inputs/media/art.txt new file mode 100644 index 00000000000000..1ce9ab967e4a15 --- /dev/null +++ b/clang/test/Preprocessor/Inputs/media/art.txt @@ -0,0 +1,9 @@ + __ _ + .-.' `; `-._ __ _ + (_, .-:' `; `-._ + ,'o"( (_, ) + (__,-' ,'o"( )> + ( (__,-' ) + `-'._.--._( ) + ||| |||`-'._.--._.-' + ||| ||| diff --git a/clang/test/Preprocessor/Inputs/media/empty b/clang/test/Preprocessor/Inputs/media/empty new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/clang/test/Preprocessor/Inputs/single_byte.txt b/clang/test/Preprocessor/Inputs/single_byte.txt new file mode 100644 index 00000000000000..63d8dbd40c2354 --- /dev/null +++ b/clang/test/Preprocessor/Inputs/single_byte.txt @@ -0,0 +1 @@ +b \ No newline at end of file diff --git a/clang/test/Preprocessor/embed___has_embed.c b/clang/test/Preprocessor/embed___has_embed.c new file mode 100644 index 00000000000000..80980e753614a5 --- /dev/null +++ b/clang/test/Preprocessor/embed___has_embed.c @@ -0,0 +1,34 @@ +// RUN: %clang_cc1 %s -E -embed-dir=%S/Inputs -CC -verify + +#if !__has_embed(__FILE__) +#error 1 +#elif !__has_embed("media/art.txt") +#error 2 +#elif __has_embed("asdkasdjkadsjkdsfjk") +#error 3 +#elif __has_embed("asdkasdjkadsjkdsfjk" limit(1)) +#error 4 +#elif __has_embed("asdkasdjkadsjkdsfjk" suffix(x) limit(1)) +#error 5 +#elif __has_embed("asdkasdjkadsjkdsfjk" suffix(x) djsakdasjd::xmeow("xD")) +#error 6 +#elif !__has_embed(__FILE__ limit(2) prefix(y)) +#error 7 +#elif !__has_embed(__FILE__ limit(2)) +#error 8 +#elif __has_embed(__FILE__ dajwdwdjdahwk::meow(x)) +#error 9 +#elif __has_embed() != 2 +#error 10 +#elif __has_embed( limit(0)) != 2 +#error 11 +#elif __has_embed( limit(0)) != 2 +#error 12 +#elif __has_embed( limit(1) clang::offset(1)) != 2 +#error 13 +#elif !__has_embed() +#error 14 +#elif !__has_embed( if_empty(meow)) +#error 14 +#endif +// expected-no-diagnostics diff --git a/clang/test/Preprocessor/embed___has_embed_supported.c b/clang/test/Preprocessor/embed___has_embed_supported.c new file mode 100644 index 00000000000000..fe0edb00e60983 --- /dev/null +++ b/clang/test/Preprocessor/embed___has_embed_supported.c @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 %s -E -CC -verify + +#if !__has_embed(__FILE__) +#error 1 +#elif !__has_embed(__FILE__) +#error 2 +#elif !__has_embed(__FILE__ suffix(x)) +#error 3 +#elif !__has_embed(__FILE__ suffix(x) limit(1)) +#error 4 +#elif !__has_embed(__FILE__ suffix(x) limit(1) prefix(1)) +#error 5 +#elif !__has_embed(__FILE__ suffix(x) limit(2) prefix(1) clang::offset(1)) +#error 6 +#elif !__has_embed(__FILE__ suffix(x) limit(0) prefix(1)) +#error 7 +#elif __has_embed(__FILE__ suffix(x) limit(1) prefix(1) clang::offset(1)) != 2 +#error 8 +#elif __has_embed(__FILE__ suffix(x) limit(0)) != 2 +#error 9 +#elif __has_embed(__FILE__ suffix(x) limit(0) if_empty(:3)) != 2 +#error 10 +#endif +// expected-no-diagnostics diff --git a/clang/test/Preprocessor/embed_feature_test.cpp b/clang/test/Preprocessor/embed_feature_test.cpp new file mode 100644 index 00000000000000..46787041ca23be --- /dev/null +++ b/clang/test/Preprocessor/embed_feature_test.cpp @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 %s -E -CC -verify +// RUN: %clang_cc1 -x c %s -E -CC -verify + +#if defined(__cplusplus) +#if !defined(__cpp_pp_embed) || __cpp_pp_embed != 202403L +#error 1 +#endif +#endif + +#if !defined(__has_embed) +#error 2 +#endif +// expected-no-diagnostics diff --git a/clang/test/Preprocessor/embed_file_not_found.c b/clang/test/Preprocessor/embed_file_not_found.c new file mode 100644 index 00000000000000..337fa4ac067ec7 --- /dev/null +++ b/clang/test/Preprocessor/embed_file_not_found.c @@ -0,0 +1,4 @@ +// RUN: %clang_cc1 %s -E -CC -verify + +#embed +// expected-error@-1 {{'nfejfNejAKFe' file not found}} diff --git a/clang/test/Preprocessor/embed_init.c b/clang/test/Preprocessor/embed_init.c new file mode 100644 index 00000000000000..cd517b7f216ac3 --- /dev/null +++ b/clang/test/Preprocessor/embed_init.c @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -CC -verify + +typedef struct kitty { + int purr; +} kitty; + +typedef struct kitty_kitty { + int here; + kitty kit; +} kitty_kitty; + +const int meow = +#embed +; + +const kitty kit = { +#embed +}; + +const kitty_kitty kit_kit = { +#embed +}; + +_Static_assert(meow == 'b', ""); +_Static_assert(kit.purr == 'b', ""); +_Static_assert(kit_kit.here == 'j', ""); +_Static_assert(kit_kit.kit.purr == 'k', ""); +// expected-no-diagnostics diff --git a/clang/test/Preprocessor/embed_parameter_if_empty.c b/clang/test/Preprocessor/embed_parameter_if_empty.c new file mode 100644 index 00000000000000..ac1a768b27ffff --- /dev/null +++ b/clang/test/Preprocessor/embed_parameter_if_empty.c @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify + +const char data[] = { +#embed if_empty(123, 124, 125) +}; +const char non_empty_data[] = { +#embed if_empty(123, 124, 125) +}; +_Static_assert(sizeof(data) == 3, ""); +_Static_assert(123 == data[0], ""); +_Static_assert(124 == data[1], ""); +_Static_assert(125 == data[2], ""); +_Static_assert(sizeof(non_empty_data) == 2, ""); +_Static_assert('j' == non_empty_data[0], ""); +_Static_assert('k' == non_empty_data[1], ""); +// expected-no-diagnostics diff --git a/clang/test/Preprocessor/embed_parameter_limit.c b/clang/test/Preprocessor/embed_parameter_limit.c new file mode 100644 index 00000000000000..28a94fe9430f03 --- /dev/null +++ b/clang/test/Preprocessor/embed_parameter_limit.c @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify + +const char data[] = { +#embed +}; +const char offset_data[] = { +#embed limit(1) +}; +_Static_assert(sizeof(data) == 2, ""); +_Static_assert('j' == data[0], ""); +_Static_assert('k' == data[1], ""); +_Static_assert(sizeof(offset_data) == 1, ""); +_Static_assert('j' == offset_data[0], ""); +_Static_assert(offset_data[0] == data[0], ""); +// expected-no-diagnostics diff --git a/clang/test/Preprocessor/embed_parameter_offset.c b/clang/test/Preprocessor/embed_parameter_offset.c new file mode 100644 index 00000000000000..71a029544dca55 --- /dev/null +++ b/clang/test/Preprocessor/embed_parameter_offset.c @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify + +const char data[] = { +#embed +}; +const char offset_data[] = { +#embed clang::offset(1) +}; +_Static_assert(sizeof(data) == 2, ""); +_Static_assert('j' == data[0], ""); +_Static_assert('k' == data[1], ""); +_Static_assert(sizeof(offset_data) == 1, ""); +_Static_assert('k' == offset_data[0], ""); +_Static_assert(offset_data[0] == data[1], ""); +// expected-no-diagnostics diff --git a/clang/test/Preprocessor/embed_parameter_prefix.c b/clang/test/Preprocessor/embed_parameter_prefix.c new file mode 100644 index 00000000000000..5182a2b874d399 --- /dev/null +++ b/clang/test/Preprocessor/embed_parameter_prefix.c @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify + +const char data[] = { +#embed prefix('\xA', ) +}; +const char empty_data[] = { +#embed prefix('\xA', ) +1 +}; +_Static_assert(sizeof(data) == 2, ""); +_Static_assert('\xA' == data[0], ""); +_Static_assert('b' == data[1], ""); +_Static_assert(sizeof(empty_data) == 1, ""); +_Static_assert(1 == empty_data[0], ""); +// expected-no-diagnostics diff --git a/clang/test/Preprocessor/embed_parameter_suffix.c b/clang/test/Preprocessor/embed_parameter_suffix.c new file mode 100644 index 00000000000000..11c3f2bbbfb2bb --- /dev/null +++ b/clang/test/Preprocessor/embed_parameter_suffix.c @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 %s -embed-dir=%S/Inputs -fsyntax-only -verify + +const char data[] = { +#embed suffix(, '\xA') +}; +const char empty_data[] = { +#embed suffix(, '\xA') +1 +}; +_Static_assert(sizeof(data) == 2, ""); +_Static_assert('b' == data[0], ""); +_Static_assert('\xA' == data[1], ""); +_Static_assert(sizeof(empty_data) == 1, ""); +_Static_assert(1 == empty_data[0], ""); +// expected-no-diagnostics diff --git a/clang/test/Preprocessor/embed_parameter_unrecognized.c b/clang/test/Preprocessor/embed_parameter_unrecognized.c new file mode 100644 index 00000000000000..1f043ccd2ff54b --- /dev/null +++ b/clang/test/Preprocessor/embed_parameter_unrecognized.c @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 %s -E -CC -verify + +#embed __FILE__ unrecognized +// expected-warning@-1 {{unknown embed preprocessor parameter 'unrecognized' ignored}} +#embed __FILE__ unrecognized::param +// expected-warning@-1 {{unknown embed preprocessor parameter 'unrecognized::param' ignored}} +#embed __FILE__ unrecognized::param(with, args) +// expected-warning@-1 {{unknown embed preprocessor parameter 'unrecognized::param' ignored}} diff --git a/clang/test/Preprocessor/embed_path_chevron.c b/clang/test/Preprocessor/embed_path_chevron.c new file mode 100644 index 00000000000000..5c33871c0c8a4d --- /dev/null +++ b/clang/test/Preprocessor/embed_path_chevron.c @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -CC -verify + +const char data[] = { +#embed +}; +_Static_assert(sizeof(data) == 1, ""); +_Static_assert('b' == data[0], ""); +// expected-no-diagnostics diff --git a/clang/test/Preprocessor/embed_path_quote.c b/clang/test/Preprocessor/embed_path_quote.c new file mode 100644 index 00000000000000..791cd9176ebe0a --- /dev/null +++ b/clang/test/Preprocessor/embed_path_quote.c @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 %s -fsyntax-only -embed-dir=%S/Inputs -CC -verify + +const char data[] = { +#embed "single_byte.txt" +}; +_Static_assert(sizeof(data) == 1, ""); +_Static_assert('a' == data[0], ""); +// expected-no-diagnostics diff --git a/clang/test/Preprocessor/single_byte.txt b/clang/test/Preprocessor/single_byte.txt new file mode 100644 index 00000000000000..2e65efe2a145dd --- /dev/null +++ b/clang/test/Preprocessor/single_byte.txt @@ -0,0 +1 @@ +a \ No newline at end of file diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 103c08ffbe83b3..8f9d7c77ccd150 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -777,6 +777,13 @@ if(NOT DEFINED LLVM_DYLIB_COMPONENTS) "Semicolon-separated list of components to include in libLLVM, or \"all\".") endif() +option(LLVM_ENABLE_MSSTL_SECURE_WARNINGS "Turn on security warnings for use specific functions in Microsoft's STL." ON) +# Quiet down MSVC-style secure CRT warnings +if(NOT LLVM_ENABLE_MSSTL_SECURE_WARNINGS) + add_compile_definitions(_CRT_SECURE_NO_WARNINGS=1 _CRT_NONSTDC_NO_WARNINGS=1) +endif() + + if(MSVC) option(LLVM_BUILD_LLVM_C_DYLIB "Build LLVM-C.dll (Windows only)" ON) # Set this variable to OFF here so it can't be set with a command-line diff --git a/llvm/cmake/modules/GetHostTriple.cmake b/llvm/cmake/modules/GetHostTriple.cmake index 1be13bc01ab9b2..828227f2f25a2f 100644 --- a/llvm/cmake/modules/GetHostTriple.cmake +++ b/llvm/cmake/modules/GetHostTriple.cmake @@ -2,7 +2,7 @@ # Invokes config.guess function( get_host_triple var ) - if( MSVC ) + if( MSVC OR (CMAKE_HOST_SYSTEM_NAME STREQUAL Windows AND CMAKE_CXX_COMPILER_ID MATCHES "Clang") ) if( CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "ARM64.*" ) set( value "aarch64-pc-windows-msvc" ) elseif( CMAKE_C_COMPILER_ARCHITECTURE_ID MATCHES "ARM.*" ) @@ -41,7 +41,7 @@ function( get_host_triple var ) else() set( value "powerpc-ibm-aix" ) endif() - else( MSVC ) + else() if(CMAKE_HOST_SYSTEM_NAME STREQUAL Windows AND NOT MSYS) message(WARNING "unable to determine host target triple") else() @@ -55,6 +55,6 @@ function( get_host_triple var ) endif( NOT TT_RV EQUAL 0 ) set( value ${TT_OUT} ) endif() - endif( MSVC ) + endif() set( ${var} ${value} PARENT_SCOPE ) endfunction( get_host_triple var ) From 6a7a4c959f1635f5c3549010d277b5834a3e3fe2 Mon Sep 17 00:00:00 2001 From: ThePhD Date: Sun, 8 Oct 2023 17:43:51 -0400 Subject: [PATCH 002/113] =?UTF-8?q?=E2=9C=A8=20Speedy=20#embed=20implement?= =?UTF-8?q?ation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ⚡ [Lex] Better reservations for improved performance/memory usage. 🛠 [Lex, Frontend] Remove comma hardcoding since we are servicing a full file apply suggestions from git-clang-format --- clang/include/clang/AST/Expr.h | 51 ++ clang/include/clang/AST/RecursiveASTVisitor.h | 1 + .../clang/Basic/DiagnosticCommonKinds.td | 6 + clang/include/clang/Basic/FileManager.h | 5 +- clang/include/clang/Basic/StmtNodes.td | 1 + clang/include/clang/Basic/TokenKinds.def | 6 +- .../Frontend/PreprocessorOutputOptions.h | 3 +- .../include/clang/Lex/PPDirectiveParameter.h | 32 ++ clang/include/clang/Lex/PPEmbedParameters.h | 78 ++++ clang/include/clang/Lex/Preprocessor.h | 42 +- clang/include/clang/Sema/Sema.h | 37 ++ .../include/clang/Serialization/ASTBitCodes.h | 3 + clang/lib/AST/Expr.cpp | 16 + clang/lib/AST/ExprClassification.cpp | 5 + clang/lib/AST/ExprConstant.cpp | 8 + clang/lib/AST/ItaniumMangle.cpp | 1 + clang/lib/AST/StmtPrinter.cpp | 7 + clang/lib/AST/StmtProfile.cpp | 2 + clang/lib/Basic/FileManager.cpp | 1 - clang/lib/Basic/IdentifierTable.cpp | 6 +- clang/lib/Driver/ToolChains/Clang.cpp | 3 +- clang/lib/Format/TokenAnnotator.cpp | 3 +- clang/lib/Frontend/DependencyFile.cpp | 15 +- clang/lib/Frontend/DependencyGraph.cpp | 2 +- .../lib/Frontend/PrintPreprocessedOutput.cpp | 14 +- clang/lib/Interpreter/Interpreter.cpp | 1 + clang/lib/Lex/Lexer.cpp | 8 + clang/lib/Lex/PPDirectives.cpp | 434 ++++++++++++++---- clang/lib/Lex/PPMacroExpansion.cpp | 23 +- clang/lib/Lex/Preprocessor.cpp | 6 +- clang/lib/Parse/ParseExpr.cpp | 104 +++++ clang/lib/Parse/ParseTemplate.cpp | 2 + clang/lib/Sema/SemaDecl.cpp | 48 ++ clang/lib/Sema/SemaDeclCXX.cpp | 3 +- clang/lib/Sema/SemaExceptionSpec.cpp | 1 + clang/lib/Sema/SemaExpr.cpp | 239 +++++++++- clang/lib/Sema/SemaTemplate.cpp | 56 +++ clang/lib/Sema/TreeTransform.h | 6 + clang/lib/Serialization/ASTReaderStmt.cpp | 13 + clang/lib/Serialization/ASTWriterStmt.cpp | 10 + clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 4 + clang/test/Preprocessor/embed_art.c | 106 +++++ clang/test/Preprocessor/embed_single_entity.c | 7 + clang/test/Preprocessor/embed_weird.cpp | 68 +++ llvm/include/llvm/Support/Base64.h | 36 +- 45 files changed, 1351 insertions(+), 172 deletions(-) create mode 100644 clang/include/clang/Lex/PPDirectiveParameter.h create mode 100644 clang/include/clang/Lex/PPEmbedParameters.h create mode 100644 clang/test/Preprocessor/embed_art.c create mode 100644 clang/test/Preprocessor/embed_single_entity.c create mode 100644 clang/test/Preprocessor/embed_weird.cpp diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index b69c616b009036..d3fba205c91c93 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -4805,6 +4805,57 @@ class SourceLocExpr final : public Expr { friend class ASTStmtReader; }; +/// Represents a function call to __builtin_pp_embed(). +class PPEmbedExpr final : public Expr { + SourceLocation BuiltinLoc, RParenLoc; + DeclContext *ParentContext; + StringLiteral *Filename; + StringLiteral *BinaryData; + +public: + enum Action { + NotFound, + FoundOne, + Expanded, + }; + + PPEmbedExpr(const ASTContext &Ctx, QualType ResultTy, StringLiteral *Filename, + StringLiteral *BinaryData, SourceLocation BLoc, + SourceLocation RParenLoc, DeclContext *Context); + + /// Build an empty call expression. + explicit PPEmbedExpr(EmptyShell Empty) : Expr(SourceLocExprClass, Empty) {} + + /// If the PPEmbedExpr has been resolved return the subexpression + /// representing the resolved value. Otherwise return null. + const DeclContext *getParentContext() const { return ParentContext; } + DeclContext *getParentContext() { return ParentContext; } + + SourceLocation getLocation() const { return BuiltinLoc; } + SourceLocation getBeginLoc() const { return BuiltinLoc; } + SourceLocation getEndLoc() const { return RParenLoc; } + + StringLiteral *getFilenameStringLiteral() const { return Filename; } + StringLiteral *getDataStringLiteral() const { return BinaryData; } + + size_t getDataElementCount(ASTContext &Context) const; + + child_range children() { + return child_range(child_iterator(), child_iterator()); + } + + const_child_range children() const { + return const_child_range(child_iterator(), child_iterator()); + } + + static bool classof(const Stmt *T) { + return T->getStmtClass() == PPEmbedExprClass; + } + +private: + friend class ASTStmtReader; +}; + /// Describes an C or C++ initializer list. /// /// InitListExpr describes an initializer list, which can be used to diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 3dd23eb38eeabf..6b7211bb0a0d3f 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -2809,6 +2809,7 @@ DEF_TRAVERSE_STMT(ShuffleVectorExpr, {}) DEF_TRAVERSE_STMT(ConvertVectorExpr, {}) DEF_TRAVERSE_STMT(StmtExpr, {}) DEF_TRAVERSE_STMT(SourceLocExpr, {}) +DEF_TRAVERSE_STMT(PPEmbedExpr, {}) DEF_TRAVERSE_STMT(UnresolvedLookupExpr, { TRY_TO(TraverseNestedNameSpecifierLoc(S->getQualifierLoc())); diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td index f2df283c74829f..4df86e35eebde3 100644 --- a/clang/include/clang/Basic/DiagnosticCommonKinds.td +++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td @@ -59,6 +59,9 @@ def err_expected_string_literal : Error<"expected string literal " "'external_source_symbol' attribute|" "as argument of '%1' attribute}0">; +def err_builtin_pp_embed_invalid_argument : Error< + "invalid argument to '__builtin_pp_embed': %0">; + def err_invalid_string_udl : Error< "string literal with user-defined suffix cannot be used here">; def err_invalid_character_udl : Error< @@ -80,6 +83,9 @@ def err_expected : Error<"expected %0">; def err_expected_either : Error<"expected %0 or %1">; def err_expected_after : Error<"expected %1 after %0">; +def err_builtin_pp_embed_invalid_location : Error< + "'__builtin_pp_embed' in invalid location: %0%select{|%2}1">; + def err_param_redefinition : Error<"redefinition of parameter %0">; def warn_method_param_redefinition : Warning<"redefinition of method parameter %0">; def warn_method_param_declaration : Warning<"redeclaration of method parameter %0">, diff --git a/clang/include/clang/Basic/FileManager.h b/clang/include/clang/Basic/FileManager.h index c757f8775b425e..cbfcb292778e5f 100644 --- a/clang/include/clang/Basic/FileManager.h +++ b/clang/include/clang/Basic/FileManager.h @@ -282,8 +282,9 @@ class FileManager : public RefCountedBase { getBufferForFile(StringRef Filename, bool isVolatile = false, bool RequiresNullTerminator = true, std::optional MaybeLimit = std::nullopt) { - return getBufferForFileImpl(Filename, /*FileSize=*/(MaybeLimit ? *MaybeLimit : -1), isVolatile, - RequiresNullTerminator); + return getBufferForFileImpl(Filename, + /*FileSize=*/(MaybeLimit ? *MaybeLimit : -1), + isVolatile, RequiresNullTerminator); } private: diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td index cec301dfca2817..e3be997dd1c86e 100644 --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -203,6 +203,7 @@ def OpaqueValueExpr : StmtNode; def TypoExpr : StmtNode; def RecoveryExpr : StmtNode; def BuiltinBitCastExpr : StmtNode; +def PPEmbedExpr : StmtNode; // Microsoft Extensions. def MSPropertyRefExpr : StmtNode; diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index 19a66fbb073119..167bd614efe7bd 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -154,10 +154,6 @@ TOK(eod) // End of preprocessing directive (end of line inside a // directive). TOK(code_completion) // Code completion marker -// #embed speed support -TOK(builtin_embed) - - // C99 6.4.9: Comments. TOK(comment) // Comment (only in -E -C[C] mode) @@ -758,6 +754,7 @@ ALIAS("__char32_t" , char32_t , KEYCXX) KEYWORD(__builtin_bit_cast , KEYALL) KEYWORD(__builtin_available , KEYALL) KEYWORD(__builtin_sycl_unique_stable_name, KEYSYCL) +KEYWORD(__builtin_pp_embed , KEYALL) // Keywords defined by Attr.td. #ifndef KEYWORD_ATTRIBUTE @@ -993,6 +990,7 @@ ANNOTATION(repl_input_end) #undef CXX11_KEYWORD #undef KEYWORD #undef PUNCTUATOR +#undef BUILTINOK #undef TOK #undef C99_KEYWORD #undef C23_KEYWORD diff --git a/clang/include/clang/Frontend/PreprocessorOutputOptions.h b/clang/include/clang/Frontend/PreprocessorOutputOptions.h index 3e36db3f8ce46e..0bc32c65a58d2d 100644 --- a/clang/include/clang/Frontend/PreprocessorOutputOptions.h +++ b/clang/include/clang/Frontend/PreprocessorOutputOptions.h @@ -22,7 +22,8 @@ class PreprocessorOutputOptions { unsigned ShowMacroComments : 1; ///< Show comments, even in macros. unsigned ShowMacros : 1; ///< Print macro definitions. unsigned ShowIncludeDirectives : 1; ///< Print includes, imports etc. within preprocessed output. - unsigned ShowEmbedDirectives : 1; ///< Print embeds, etc. within preprocessed output. + unsigned ShowEmbedDirectives : 1; ///< Print embeds, etc. within preprocessed + ///< output. unsigned RewriteIncludes : 1; ///< Preprocess include directives only. unsigned RewriteImports : 1; ///< Include contents of transitively-imported modules. unsigned MinimizeWhitespace : 1; ///< Ignore whitespace from input. diff --git a/clang/include/clang/Lex/PPDirectiveParameter.h b/clang/include/clang/Lex/PPDirectiveParameter.h new file mode 100644 index 00000000000000..fc413c345adc53 --- /dev/null +++ b/clang/include/clang/Lex/PPDirectiveParameter.h @@ -0,0 +1,32 @@ +//===--- MacroArgs.h - Formal argument info for Macros ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the MacroArgs interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEX_PPDIRECTIVEPARAMETER_H +#define LLVM_CLANG_LEX_PPDIRECTIVEPARAMETER_H + +#include "clang/Basic/SourceLocation.h" + +namespace clang { + +/// Captures basic information about a preprocessor directive parameter. +class PPDirectiveParameter { +public: + SourceLocation Start; + SourceLocation End; + + PPDirectiveParameter(SourceLocation Start, SourceLocation End) + : Start(Start), End(End) {} +}; + +} // end namespace clang + +#endif diff --git a/clang/include/clang/Lex/PPEmbedParameters.h b/clang/include/clang/Lex/PPEmbedParameters.h new file mode 100644 index 00000000000000..7b76d2d573c23b --- /dev/null +++ b/clang/include/clang/Lex/PPEmbedParameters.h @@ -0,0 +1,78 @@ +//===--- MacroArgs.h - Formal argument info for Macros ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the MacroArgs interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LEX_PPEMBEDPARAMETERS_H +#define LLVM_CLANG_LEX_PPEMBEDPARAMETERS_H + +#include "clang/Lex/PPDirectiveParameter.h" +#include "clang/Lex/Token.h" +#include "llvm/ADT/SmallVector.h" + +namespace clang { + +/// Preprocessor extension embed parameter "clang::offset" +/// `clang::offset( constant-expression )` +class PPEmbedParameterOffset : public PPDirectiveParameter { +public: + size_t Offset; + + PPEmbedParameterOffset(size_t Offset, SourceLocation Start, + SourceLocation End) + : Offset(Offset), PPDirectiveParameter(Start, End) {} +}; + +/// Preprocessor standard embed parameter "limit" +/// `limit( constant-expression )` +class PPEmbedParameterLimit : public PPDirectiveParameter { +public: + size_t Limit; + + PPEmbedParameterLimit(size_t Limit, SourceLocation Start, SourceLocation End) + : Limit(Limit), PPDirectiveParameter(Start, End) {} +}; + +/// Preprocessor standard embed parameter "prefix" +/// `prefix( balanced-token-seq )` +class PPEmbedParameterPrefix : public PPDirectiveParameter { +public: + SmallVector Tokens; + + PPEmbedParameterPrefix(SmallVector Tokens, SourceLocation Start, + SourceLocation End) + : Tokens(std::move(Tokens)), PPDirectiveParameter(Start, End) {} +}; + +/// Preprocessor standard embed parameter "suffix" +/// `suffix( balanced-token-seq )` +class PPEmbedParameterSuffix : public PPDirectiveParameter { +public: + SmallVector Tokens; + + PPEmbedParameterSuffix(SmallVector Tokens, SourceLocation Start, + SourceLocation End) + : Tokens(std::move(Tokens)), PPDirectiveParameter(Start, End) {} +}; + +/// Preprocessor standard embed parameter "if_empty" +/// `if_empty( balanced-token-seq )` +class PPEmbedParameterIfEmpty : public PPDirectiveParameter { +public: + SmallVector Tokens; + + PPEmbedParameterIfEmpty(SmallVector Tokens, SourceLocation Start, + SourceLocation End) + : Tokens(std::move(Tokens)), PPDirectiveParameter(Start, End) {} +}; + +} // end namespace clang + +#endif diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 7470bf5882730c..58012fb79559e2 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -29,6 +29,7 @@ #include "clang/Lex/ModuleLoader.h" #include "clang/Lex/ModuleMap.h" #include "clang/Lex/PPCallbacks.h" +#include "clang/Lex/PPEmbedParameters.h" #include "clang/Lex/Token.h" #include "clang/Lex/TokenLexer.h" #include "llvm/ADT/APSInt.h" @@ -1165,6 +1166,9 @@ class Preprocessor { void updateOutOfDateIdentifier(IdentifierInfo &II) const; + /// Buffers for used #embed directives + std::vector EmbedBuffers; + public: Preprocessor(std::shared_ptr PPOpts, DiagnosticsEngine &diags, const LangOptions &LangOpts, @@ -1735,15 +1739,15 @@ class Preprocessor { bool LexHeaderName(Token &Result, bool AllowMacroExpansion = true); struct LexEmbedParametersResult { - bool Successful; - std::optional MaybeLimitParam; - std::optional MaybeOffsetParam; - std::optional> MaybeIfEmptyParam; - std::optional> MaybePrefixParam; - std::optional> MaybeSuffixParam; - int UnrecognizedParams; + std::optional MaybeLimitParam; + std::optional MaybeOffsetParam; + std::optional MaybeIfEmptyParam; + std::optional MaybePrefixParam; + std::optional MaybeSuffixParam; SourceLocation StartLoc; SourceLocation EndLoc; + int UnrecognizedParams; + bool Successful; }; LexEmbedParametersResult LexEmbedParameters(Token &Current, @@ -1812,7 +1816,8 @@ class Preprocessor { /// Parses a simple integer literal to get its numeric value. Floating /// point literals and user defined literals are rejected. Used primarily to /// handle pragmas that accept integer arguments. - bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value); + bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value, + bool WithLex = true); /// Disables macro expansion everywhere except for preprocessor directives. void SetMacroExpansionOnlyInDirectives() { @@ -2441,8 +2446,7 @@ class Preprocessor { /// reference is for system \#include's or not (i.e. using <> instead of ""). OptionalFileEntryRef LookupEmbedFile(SourceLocation FilenameLoc, StringRef Filename, bool isAngled, - bool OpenFile, - const FileEntry *LookupFromFile = nullptr, + bool OpenFile, const FileEntry *LookupFromFile = nullptr, SmallVectorImpl *SearchPath = nullptr, SmallVectorImpl *RelativePath = nullptr); @@ -2735,12 +2739,18 @@ class Preprocessor { // Binary data inclusion void HandleEmbedDirective(SourceLocation HashLoc, Token &Tok, const FileEntry *LookupFromFile = nullptr); - void HandleEmbedDirectiveNaive( - SourceLocation FilenameTok, LexEmbedParametersResult &Params, - StringRef BinaryContents, const size_t TargetCharWidth); - void HandleEmbedDirectiveBuiltin( - SourceLocation FilenameTok, LexEmbedParametersResult &Params, - StringRef BinaryContents, const size_t TargetCharWidth); + void HandleEmbedDirectiveNaive(SourceLocation HashLoc, + SourceLocation FilenameTok, + const LexEmbedParametersResult &Params, + StringRef BinaryContents, + const size_t TargetCharWidth); + void HandleEmbedDirectiveBuiltin(SourceLocation HashLoc, + const Token &FilenameTok, + StringRef ResolvedFilename, + StringRef SearchPath, StringRef RelativePath, + const LexEmbedParametersResult &Params, + StringRef BinaryContents, + const size_t TargetCharWidth); // File inclusion. void HandleIncludeDirective(SourceLocation HashLoc, Token &Tok, diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 2ebd21090ae4e1..d3c62d8e75650e 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -5981,6 +5981,10 @@ class Sema final { ArrayRef Arg, SourceLocation RParenLoc, Expr *Config = nullptr, bool IsExecConfig = false, ADLCallKind UsesADL = ADLCallKind::NotADL); + /// `Fn` may be a null pointer. + void ModifyCallExprArguments(Expr *Fn, SourceLocation LParenLoc, + SmallVectorImpl &ArgExprs, + SourceLocation RParenLoc); ExprResult ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, MultiExprArg ExecConfig, @@ -6098,6 +6102,35 @@ class Sema final { SourceLocation BuiltinLoc, SourceLocation RPLoc); + // __builtin_pp_embed() + ExprResult ActOnPPEmbedExpr(SourceLocation BuiltinLoc, + SourceLocation Base64DataLocation, + SourceLocation RPLoc, StringLiteral *Filename, + QualType DataTy, std::vector BinaryData); + + IntegerLiteral *ExpandSinglePPEmbedExpr(PPEmbedExpr *PPEmbed); + + PPEmbedExpr::Action + CheckExprListForPPEmbedExpr(ArrayRef ExprList, + std::optional MaybeInitType); + PPEmbedExpr::Action + ExpandPPEmbedExprInExprList(ArrayRef ExprList, + SmallVectorImpl &OutputExprList, + bool ClearOutputFirst = true); + PPEmbedExpr::Action + ExpandPPEmbedExprInExprList(SmallVectorImpl &OutputList); + + enum PPEmbedExprContext { + PPEEC__StaticAssert, + PPEEC_StaticAssert, + }; + + StringRef GetLocationName(PPEmbedExprContext Context) const; + + bool DiagnosePPEmbedExpr(Expr *&E, SourceLocation ContextLocation, + PPEmbedExprContext Context, + bool SingleAllowed = true); + // Build a potentially resolved SourceLocExpr. ExprResult BuildSourceLocExpr(SourceLocExpr::IdentKind Kind, QualType ResultTy, SourceLocation BuiltinLoc, @@ -8290,6 +8323,10 @@ class Sema final { SourceLocation EqualLoc, ParsedTemplateArgument DefaultArg); + void ModifyTemplateArguments( + const TemplateTy &Template, + SmallVectorImpl &TemplateArgs); + TemplateParameterList * ActOnTemplateParameterList(unsigned Depth, SourceLocation ExportLoc, diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 5c32fbc079c9a6..138c52bc8149fc 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -1715,6 +1715,9 @@ enum StmtCode { /// A SourceLocExpr record. EXPR_SOURCE_LOC, + /// A PPEmbedExpr record. + EXPR_BUILTIN_PP_EMBED, + /// A ShuffleVectorExpr record. EXPR_SHUFFLE_VECTOR, diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 4bfc4f082cd6a6..f0c0359cd9feaf 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -2392,6 +2392,21 @@ APValue SourceLocExpr::EvaluateInContext(const ASTContext &Ctx, llvm_unreachable("unhandled case"); } +PPEmbedExpr::PPEmbedExpr(const ASTContext &Ctx, QualType ResultTy, + StringLiteral *Filename, StringLiteral *BinaryData, + SourceLocation BLoc, SourceLocation RParenLoc, + DeclContext *ParentContext) + : Expr(PPEmbedExprClass, ResultTy, VK_PRValue, OK_Ordinary), + BuiltinLoc(BLoc), RParenLoc(RParenLoc), ParentContext(ParentContext), + Filename(Filename), BinaryData(BinaryData) { + setDependence(ExprDependence::None); +} + +size_t PPEmbedExpr::getDataElementCount(ASTContext &Context) const { + return getDataStringLiteral()->getByteLength() / + (Context.getTypeSize(getType()) / Context.getTypeSize(Context.CharTy)); +} + InitListExpr::InitListExpr(const ASTContext &C, SourceLocation lbraceloc, ArrayRef initExprs, SourceLocation rbraceloc) : Expr(InitListExprClass, QualType(), VK_PRValue, OK_Ordinary), @@ -3610,6 +3625,7 @@ bool Expr::HasSideEffects(const ASTContext &Ctx, case CXXUuidofExprClass: case OpaqueValueExprClass: case SourceLocExprClass: + case PPEmbedExprClass: case ConceptSpecializationExprClass: case RequiresExprClass: case SYCLUniqueStableNameExprClass: diff --git a/clang/lib/AST/ExprClassification.cpp b/clang/lib/AST/ExprClassification.cpp index ffa7c6802ea6e1..fbbbd72b144571 100644 --- a/clang/lib/AST/ExprClassification.cpp +++ b/clang/lib/AST/ExprClassification.cpp @@ -204,6 +204,11 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) { case Expr::RequiresExprClass: return Cl::CL_PRValue; + case Expr::PPEmbedExprClass: + // Nominally, this just goes through as a PRValue until we actually expand + // it and check it. + return Cl::CL_PRValue; + // Make HLSL this reference-like case Expr::CXXThisExprClass: return Lang.HLSL ? Cl::CL_LValue : Cl::CL_PRValue; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index e5539dedec02a4..b6967cc97d78c5 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -8921,6 +8921,11 @@ class PointerExprEvaluator return true; } + bool VisitPPEmbedExpr(const PPEmbedExpr *E) { + llvm_unreachable("Not yet implemented for ExprConstant.cpp"); + return true; + } + bool VisitSYCLUniqueStableNameExpr(const SYCLUniqueStableNameExpr *E) { std::string ResultStr = E->ComputeName(Info.Ctx); @@ -16166,6 +16171,9 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) { return ICEDiag(IK_NotICE, E->getBeginLoc()); return CheckICE(cast(E)->getSubExpr(), Ctx); } + case Expr::PPEmbedExprClass: { + return ICEDiag(IK_ICE, E->getBeginLoc()); + } } llvm_unreachable("Invalid StmtClass!"); diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 23ec35cae4b7b4..f08fb766efd777 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -4721,6 +4721,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity, case Expr::PseudoObjectExprClass: case Expr::AtomicExprClass: case Expr::SourceLocExprClass: + case Expr::PPEmbedExprClass: case Expr::BuiltinBitCastExprClass: { NotPrimaryExpr(); diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index a31aa0cfeeed8d..f94386be778847 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -49,6 +49,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Base64.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" @@ -1145,6 +1146,12 @@ void StmtPrinter::VisitSourceLocExpr(SourceLocExpr *Node) { OS << Node->getBuiltinStr() << "()"; } +void StmtPrinter::VisitPPEmbedExpr(PPEmbedExpr *Node) { + OS << "__builtin_pp_embed(" << Node->getType() << ", " + << Node->getFilenameStringLiteral()->getBytes() << ", \"" + << llvm::encodeBase64(Node->getDataStringLiteral()->getBytes()) << "\")"; +} + void StmtPrinter::VisitConstantExpr(ConstantExpr *Node) { PrintExpr(Node->getSubExpr()); } diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 22b6855b0fff23..0be044f54a819e 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -2284,6 +2284,8 @@ void StmtProfiler::VisitSourceLocExpr(const SourceLocExpr *E) { VisitExpr(E); } +void StmtProfiler::VisitPPEmbedExpr(const PPEmbedExpr *E) { VisitExpr(E); } + void StmtProfiler::VisitRecoveryExpr(const RecoveryExpr *E) { VisitExpr(E); } void StmtProfiler::VisitObjCStringLiteral(const ObjCStringLiteral *S) { diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp index e0e80b5e0fbedb..d8a5b56438ad33 100644 --- a/clang/lib/Basic/FileManager.cpp +++ b/clang/lib/Basic/FileManager.cpp @@ -549,7 +549,6 @@ FileManager::getBufferForFile(FileEntryRef FE, bool isVolatile, if (MaybeLimit) FileSize = *MaybeLimit; - // If there's a high enough chance that the file have changed since we // got its size, force a stat before opening it. if (isVolatile || Entry->isNamedPipe()) diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp index d2b5426d27bb3b..96ac3663ca6658 100644 --- a/clang/lib/Basic/IdentifierTable.cpp +++ b/clang/lib/Basic/IdentifierTable.cpp @@ -422,8 +422,8 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const { // collisions (if there were, the switch below would complain about duplicate // case values). Note that this depends on 'if' being null terminated. -#define HASH(LEN, FIRST, THIRD) \ - (LEN << 6) + (((FIRST-'a') - (THIRD-'a')) & 63) +#define HASH(LEN, FIRST, THIRD) \ + (LEN << 6) + (((FIRST - 'a') - (THIRD - 'a')) & 63) #define CASE(LEN, FIRST, THIRD, NAME) \ case HASH(LEN, FIRST, THIRD): \ return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME @@ -438,7 +438,7 @@ tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const { CASE( 4, 'e', 's', else); CASE( 4, 'l', 'n', line); CASE( 4, 's', 'c', sccs); - CASE( 5, 'e', 'b', embed); + CASE(5, 'e', 'b', embed); CASE( 5, 'e', 'd', endif); CASE( 5, 'e', 'r', error); CASE( 5, 'i', 'e', ident); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index fc2f749a34fc47..53a92502b463b5 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1324,7 +1324,8 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA, Args.addAllArgs(CmdArgs, {options::OPT_D, options::OPT_U, options::OPT_I_Group, - options::OPT_F, options::OPT_index_header_map, options::OPT_EmbedPath_Group}); + options::OPT_F, options::OPT_index_header_map, + options::OPT_EmbedPath_Group}); // Add -Wp, and -Xpreprocessor if using the preprocessor. diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index e405a9085951dc..0a3c16f3a669c7 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1399,8 +1399,7 @@ class AnnotatingParser { if (Tok->isOneOf(Keywords.kw___has_include, Keywords.kw___has_include_next)) { parseHasInclude(); - } - else if (Tok->is(Keywords.kw___has_embed)) { + } else if (Tok->is(Keywords.kw___has_embed)) { parseHasEmbed(); } if (Style.isCSharp() && Tok->is(Keywords.kw_where) && Tok->Next && diff --git a/clang/lib/Frontend/DependencyFile.cpp b/clang/lib/Frontend/DependencyFile.cpp index 10558b1d34bf62..04ddb92ff7f7b6 100644 --- a/clang/lib/Frontend/DependencyFile.cpp +++ b/clang/lib/Frontend/DependencyFile.cpp @@ -65,11 +65,11 @@ struct DepCollectorPPCallbacks : public PPCallbacks { /*IsMissing=*/false); } - void EmbedDirective(SourceLocation HashLoc, - StringRef FileName, bool IsAngled, - CharSourceRange FilenameRange, CharSourceRange ParametersRange, - OptionalFileEntryRef File, StringRef SearchPath, - StringRef RelativePath) override { + void EmbedDirective(SourceLocation HashLoc, StringRef FileName, bool IsAngled, + CharSourceRange FilenameRange, + CharSourceRange ParametersRange, + OptionalFileEntryRef File, StringRef SearchPath, + StringRef RelativePath) override { if (!File) DepCollector.maybeAddDependency(FileName, /*FromModule*/ false, @@ -97,14 +97,13 @@ struct DepCollectorPPCallbacks : public PPCallbacks { } void HasEmbed(SourceLocation Loc, StringRef SpelledFilename, bool IsAngled, - OptionalFileEntryRef File) override { + OptionalFileEntryRef File) override { if (!File) return; StringRef Filename = llvm::sys::path::remove_leading_dotslash(File->getName()); DepCollector.maybeAddDependency(Filename, - /*FromModule=*/false, - false, + /*FromModule=*/false, false, /*IsModuleFile=*/false, &PP.getFileManager(), /*IsMissing=*/false); diff --git a/clang/lib/Frontend/DependencyGraph.cpp b/clang/lib/Frontend/DependencyGraph.cpp index 683f751a94244e..4049a5245de7d3 100644 --- a/clang/lib/Frontend/DependencyGraph.cpp +++ b/clang/lib/Frontend/DependencyGraph.cpp @@ -53,7 +53,7 @@ class DependencyGraphCallback : public PPCallbacks { DependencyGraphCallback(const Preprocessor *_PP, StringRef OutputFile, StringRef SysRoot, DirectiveBehavior Action = IgnoreEmbed) - : PP(_PP), OutputFile(OutputFile.str()), SysRoot(SysRoot.str()) { } + : PP(_PP), OutputFile(OutputFile.str()), SysRoot(SysRoot.str()) {} void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp index fb9baa92e6836d..1d93ad97305da8 100644 --- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp +++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp @@ -107,9 +107,10 @@ class PrintPPOutputPPCallbacks : public PPCallbacks { public: PrintPPOutputPPCallbacks(Preprocessor &pp, raw_ostream *os, bool lineMarkers, - bool defines, bool DumpIncludeDirectives, bool DumpEmbedDirectives, - bool UseLineDirectives, bool MinimizeWhitespace, - bool DirectivesOnly, bool KeepSystemIncludes) + bool defines, bool DumpIncludeDirectives, + bool DumpEmbedDirectives, bool UseLineDirectives, + bool MinimizeWhitespace, bool DirectivesOnly, + bool KeepSystemIncludes) : PP(pp), SM(PP.getSourceManager()), ConcatInfo(PP), OS(os), DisableLineMarkers(lineMarkers), DumpDefines(defines), DumpIncludeDirectives(DumpIncludeDirectives), @@ -414,7 +415,7 @@ void PrintPPOutputPPCallbacks::EmbedDirective( if (DumpEmbedDirectives) { MoveToLine(HashLoc, /*RequireStartOfLine=*/true); *OS << "#embed " << (IsAngled ? '<' : '"') << FileName - << (IsAngled ? '>' : '"') << " /* clang -E -dE */"; + << (IsAngled ? '>' : '"') << " /* clang -E -dE */"; setEmittedDirectiveOnThisLine(); } } @@ -1002,8 +1003,9 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS, PrintPPOutputPPCallbacks *Callbacks = new PrintPPOutputPPCallbacks( PP, OS, !Opts.ShowLineMarkers, Opts.ShowMacros, - Opts.ShowIncludeDirectives, Opts.ShowEmbedDirectives, Opts.UseLineDirectives, - Opts.MinimizeWhitespace, Opts.DirectivesOnly, Opts.KeepSystemIncludes); + Opts.ShowIncludeDirectives, Opts.ShowEmbedDirectives, + Opts.UseLineDirectives, Opts.MinimizeWhitespace, Opts.DirectivesOnly, + Opts.KeepSystemIncludes); // Expand macros in pragmas with -fms-extensions. The assumption is that // the majority of pragmas in such a file will be Microsoft pragmas. diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index 7968c62cbd3e7b..e2e55daa77b854 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -566,6 +566,7 @@ class RuntimeInterfaceBuilder CStyleCastPtrExpr(S, Ctx.VoidPtrTy, (uintptr_t)Ty.getAsOpaquePtr()); // The QualType parameter `OpaqueType`, represented as `void*`. Args.push_back(TypeArg); + S.ModifyCallExprArguments(nullptr, E->getBeginLoc(), Args, E->getEndLoc()); // We push the last parameter based on the type of the Expr. Note we need // special care for rvalue struct. diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index feed1b9ecd71a8..b55b4c360d4429 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -417,6 +417,14 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer, } } + // NOTE: this is to prevent a few cases where token streams with + // commas are used to print with pseudo-locations after a faux-expansion + // cause reading a bogus location from a source file that does not exist. + if (Tok.is(tok::comma)) { + Buffer = ","; + return 1; + } + // NOTE: this can be checked even after testing for an IdentifierInfo. if (Tok.isLiteral()) TokStart = Tok.getLiteralData(); diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index e0d98d7ca03fa1..1696c1a40c3d46 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -42,11 +42,13 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/AlignOf.h" +#include "llvm/Support/Base64.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Path.h" #include "llvm/Support/SaveAndRestore.h" #include #include +#include #include #include #include @@ -3631,10 +3633,12 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed, SmallVector ParameterTokens; tok::TokenKind EndTokenKind = InHasEmbed ? tok::r_paren : tok::eod; Result.StartLoc = CurTok.getLocation(); + Result.EndLoc = CurTok.getLocation(); for (LexNonComment(CurTok); CurTok.isNot(EndTokenKind);) { Parameter.clear(); // Lex identifier [:: identifier ...] if (!CurTok.is(tok::identifier)) { + Result.EndLoc = CurTok.getEndLoc(); Diag(CurTok, diag::err_expected) << "identifier"; DiscardUntilEndOfDirective(); return Result; @@ -3647,6 +3651,7 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed, Parameter.append("::"); LexNonComment(CurTok); if (!CurTok.is(tok::identifier)) { + Result.EndLoc = CurTok.getEndLoc(); Diag(CurTok, diag::err_expected) << "identifier"; DiscardUntilEndOfDirective(); return Result; @@ -3670,25 +3675,19 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed, return Result; } const llvm::APSInt &LimitResult = *LimitEvalResult.Value; - const bool ValueDoesNotFit = - LimitResult.getBitWidth() > 64 - ? true - : (LimitResult.isUnsigned() || - (LimitResult.isSigned() && LimitResult.isNegative())); - if (ValueDoesNotFit) { + if (LimitResult.getBitWidth() > 64) { Diag(CurTok, diag::warn_pp_expr_overflow); - // just truncate and roll with that, I guess? - Result.MaybeLimitParam = - static_cast(LimitResult.getRawData()[0]); - } else { - Result.MaybeLimitParam = - static_cast(LimitResult.getZExtValue()); } + size_t LimitValue = 0; + LimitValue = LimitResult.getLimitedValue(); + Result.MaybeLimitParam = PPEmbedParameterLimit{ + LimitValue, ParameterStartTok.getLocation(), CurTok.getEndLoc()}; LexNonComment(CurTok); } else if (Parameter == "clang::offset") { // we have a limit parameter and its internals are processed using // evaluation rules from #if - handle here if (CurTok.isNot(tok::l_paren)) { + Result.EndLoc = CurTok.getEndLoc(); Diag(CurTok, diag::err_pp_expected_after) << "(" << Parameter; DiscardUntilEndOfDirective(); return Result; @@ -3697,18 +3696,17 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed, DirectiveEvalResult OffsetEvalResult = EvaluateDirectiveExpression(ParameterIfNDef, CurTok, false, true); if (!OffsetEvalResult.Value) { + Result.EndLoc = CurTok.getEndLoc(); return Result; } const llvm::APSInt &OffsetResult = *OffsetEvalResult.Value; + size_t OffsetValue; if (OffsetResult.getBitWidth() > 64) { Diag(CurTok, diag::warn_pp_expr_overflow); - // just truncate and roll with that, I guess? - Result.MaybeOffsetParam = - static_cast(OffsetResult.getRawData()[0]); - } else { - Result.MaybeOffsetParam = - static_cast(OffsetResult.getZExtValue()); } + OffsetValue = OffsetResult.getLimitedValue(); + Result.MaybeOffsetParam = PPEmbedParameterOffset{ + OffsetValue, ParameterStartTok.getLocation(), CurTok.getEndLoc()}; LexNonComment(CurTok); } else { if (CurTok.is(tok::l_paren)) { @@ -3764,6 +3762,7 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed, return true; }; if (!ParseArgToken()) { + Result.EndLoc = CurTok.getEndLoc(); return Result; } if (!CurTok.is(tok::r_paren)) { @@ -3775,14 +3774,17 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed, } // "Token-soup" parameters if (Parameter == "if_empty") { - // TODO: integer list optimization - Result.MaybeIfEmptyParam = std::move(ParameterTokens); + Result.MaybeIfEmptyParam = PPEmbedParameterIfEmpty{ + std::move(ParameterTokens), ParameterStartTok.getLocation(), + CurTok.getLocation()}; } else if (Parameter == "prefix") { - // TODO: integer list optimization - Result.MaybePrefixParam = std::move(ParameterTokens); + Result.MaybePrefixParam = PPEmbedParameterPrefix{ + std::move(ParameterTokens), ParameterStartTok.getLocation(), + CurTok.getLocation()}; } else if (Parameter == "suffix") { - // TODO: integer list optimization - Result.MaybeSuffixParam = std::move(ParameterTokens); + Result.MaybeSuffixParam = PPEmbedParameterSuffix{ + std::move(ParameterTokens), ParameterStartTok.getLocation(), + CurTok.getLocation()}; } else { ++Result.UnrecognizedParams; if (DiagnoseUnknown) { @@ -3793,6 +3795,7 @@ Preprocessor::LexEmbedParameters(Token &CurTok, bool InHasEmbed, } } Result.Successful = true; + Result.EndLoc = CurTok.getEndLoc(); return Result; } @@ -3823,89 +3826,327 @@ inline constexpr const char *IntegerLiterals[] = { "242", "243", "244", "245", "246", "247", "248", "249", "250", "251", "252", "253", "254", "255"}; -void Preprocessor::HandleEmbedDirectiveNaive(SourceLocation FilenameLoc, - LexEmbedParametersResult &Params, - StringRef BinaryContents, - const size_t TargetCharWidth) { - (void)TargetCharWidth; // for later, when we support various sizes - size_t TokenIndex = 0; - const size_t InitListTokensSize = [&]() { - if (BinaryContents.empty()) { - if (Params.MaybeIfEmptyParam) { - return Params.MaybeIfEmptyParam->size(); +static size_t +ComputeNaiveReserveSize(const Preprocessor::LexEmbedParametersResult &Params, + StringRef TypeName, StringRef BinaryContents, + SmallVectorImpl &TokSpellingBuffer) { + size_t ReserveSize = 0; + if (BinaryContents.empty()) { + if (Params.MaybeIfEmptyParam) { + for (const auto &Tok : Params.MaybeIfEmptyParam->Tokens) { + const size_t TokLen = Tok.getLength(); + if (TokLen > TokSpellingBuffer.size()) { + TokSpellingBuffer.resize(TokLen); + } + ReserveSize += TokLen; + } + } + } else { + if (Params.MaybePrefixParam) { + for (const auto &Tok : Params.MaybePrefixParam->Tokens) { + const size_t TokLen = Tok.getLength(); + if (TokLen > TokSpellingBuffer.size()) { + TokSpellingBuffer.resize(TokLen); + } + ReserveSize += TokLen; + } + } + for (const auto &Byte : BinaryContents) { + ReserveSize += 3 + TypeName.size(); // ((type-name) + if (Byte > 99) { + ReserveSize += 3; // ### + } else if (Byte > 9) { + ReserveSize += 2; // ## } else { - return static_cast(0); + ReserveSize += 1; // # } - } else { - return static_cast( - (Params.MaybePrefixParam ? Params.MaybePrefixParam->size() : 0) + - (BinaryContents.size() * 2 - 1) + - (Params.MaybeSuffixParam ? Params.MaybeSuffixParam->size() : 0)); + ReserveSize += 2; // ), } - }(); - std::unique_ptr InitListTokens(new Token[InitListTokensSize]()); + if (Params.MaybePrefixParam) { + for (const auto &Tok : Params.MaybePrefixParam->Tokens) { + const size_t TokLen = Tok.getLength(); + if (TokLen > TokSpellingBuffer.size()) { + TokSpellingBuffer.resize(TokLen); + } + ReserveSize += TokLen; + } + } + } + return ReserveSize; +} +void Preprocessor::HandleEmbedDirectiveNaive( + SourceLocation HashLoc, SourceLocation FilenameLoc, + const LexEmbedParametersResult &Params, StringRef BinaryContents, + const size_t TargetCharWidth) { + // Load up a new embed buffer for this file and set of parameters in + // particular. + EmbedBuffers.push_back(""); + size_t EmbedBufferNumber = EmbedBuffers.size(); + std::string EmbedBufferNumberVal = std::to_string(EmbedBufferNumber); + llvm::Twine EmbedBufferName = [](const std::string &Number) { + llvm::Twine PrefixNumber = (""); + }(EmbedBufferNumberVal); + std::string &TargetEmbedBuffer = EmbedBuffers.back(); + const size_t TotalSize = BinaryContents.size(); + // In the future, this might change/improve. + const StringRef TypeName = "unsigned char"; + + SmallVector TokSpellingBuffer(32, 0); + const size_t ReserveSize = ComputeNaiveReserveSize( + Params, TypeName, BinaryContents, TokSpellingBuffer); + TargetEmbedBuffer.reserve(ReserveSize); + + // Generate the look-alike source file if (BinaryContents.empty()) { if (Params.MaybeIfEmptyParam) { - std::copy(Params.MaybeIfEmptyParam->begin(), - Params.MaybeIfEmptyParam->end(), InitListTokens.get()); - TokenIndex += Params.MaybeIfEmptyParam->size(); - assert(TokenIndex == InitListTokensSize); - EnterTokenStream(std::move(InitListTokens), InitListTokensSize, true, - true); + const PPEmbedParameterIfEmpty &EmptyParam = *Params.MaybeIfEmptyParam; + for (const auto &Tok : EmptyParam.Tokens) { + StringRef Spelling = this->getSpelling(Tok, TokSpellingBuffer); + TargetEmbedBuffer.append(Spelling.data(), Spelling.size()); + } + } + } else { + if (Params.MaybePrefixParam) { + const PPEmbedParameterPrefix &PrefixParam = *Params.MaybePrefixParam; + for (const auto &Tok : PrefixParam.Tokens) { + StringRef Spelling = this->getSpelling(Tok, TokSpellingBuffer); + TargetEmbedBuffer.append(Spelling.data(), Spelling.size()); + } + } + for (size_t I = 0; I < TotalSize; ++I) { + unsigned char ByteValue = BinaryContents[I]; + StringRef ByteRepresentation = IntegerLiterals[ByteValue]; + TargetEmbedBuffer.append(2, '('); + TargetEmbedBuffer.append(TypeName.data(), TypeName.size()); + TargetEmbedBuffer.append(1, ')'); + TargetEmbedBuffer.append(ByteRepresentation.data(), + ByteRepresentation.size()); + TargetEmbedBuffer.append(1, ')'); + bool AtEndOfContents = I == (TotalSize - 1); + if (!AtEndOfContents) { + TargetEmbedBuffer.append(1, ','); + } + } + if (Params.MaybeSuffixParam) { + const PPEmbedParameterSuffix &SuffixParam = *Params.MaybeSuffixParam; + for (const auto &Tok : SuffixParam.Tokens) { + StringRef Spelling = this->getSpelling(Tok, TokSpellingBuffer); + TargetEmbedBuffer.append(Spelling.data(), Spelling.size()); + } } - return; } - // FIXME: this does not take the target's byte size into account; - // will fail on many DSPs and embedded machines! + // Create faux-file and its ID, backed by a memory buffer. + std::unique_ptr EmbedMemBuffer = + llvm::MemoryBuffer::getMemBufferCopy(TargetEmbedBuffer, EmbedBufferName); + assert(EmbedMemBuffer && "Cannot create predefined source buffer"); + FileID EmbedBufferFID = SourceMgr.createFileID(std::move(EmbedMemBuffer)); + assert(EmbedBufferFID.isValid() && + "Could not create FileID for #embed directive?"); + // Start parsing the look-alike source file for the embed directive and + // pretend everything is normal + // TODO: (Maybe? )Stop the PPCallbacks from considering this a Real File™. + EnterSourceFile(EmbedBufferFID, nullptr, HashLoc, false); +} + +static bool TokenListIsCharacterArray(Preprocessor &PP, + const size_t TargetCharWidth, + bool IsPrefix, + const SmallVectorImpl &Tokens, + llvm::SmallVectorImpl &Output) { + const bool IsSuffix = !IsPrefix; + size_t MaxValue = + static_cast(std::pow((size_t)2, TargetCharWidth)) - 1u; + size_t TokenIndex = 0; + // if it's a suffix, we are expecting a comma first + // if it's a prefix, we are expecting a numeric literal first + bool ExpectingNumericLiteral = IsPrefix; + const size_t TokensSize = Tokens.size(); + if (Tokens.empty()) { + return true; + } + for (; TokenIndex < TokensSize; + (void)++TokenIndex, ExpectingNumericLiteral = !ExpectingNumericLiteral) { + const Token &Tok = Tokens[TokenIndex]; + // TODO: parse an optional, PLAIN `(unsigned char)` cast in front of the + // literals, since the Spec technically decrees each element is of type + // `unsigned char` (unless we have a potential future extension for + // `clang::type(meow)` as an embed parameter + if (ExpectingNumericLiteral) { + if (Tok.isNot(tok::numeric_constant)) { + return false; + } + uint64_t Value = {}; + Token ParsingTok = Tok; + if (!PP.parseSimpleIntegerLiteral(ParsingTok, Value, false)) { + // numeric literal is a floating point literal or a UDL; too complex for + // us + return false; + } + if (Value > MaxValue || Value > static_cast(0xFF)) { + // number is too large + return false; + } + Output.push_back((char)Value); + } else { + if (Tok.isNot(tok::comma)) { + return false; + } + } + } + const bool EndedOnNumber = !ExpectingNumericLiteral; + if (IsPrefix && EndedOnNumber) { + // we ended on a number: this is a failure for prefix! + return false; + } + const bool EndedOnComma = ExpectingNumericLiteral; + if (IsSuffix && EndedOnComma) { + // we ended on a comma: this is a failure for suffix! + return false; + } + // if all tokens have been consumed by the above process, then we have + // succeeded. + return TokenIndex == TokensSize; +} + +static void TripleEncodeBase64(StringRef Bytes0, StringRef Bytes1, + StringRef Bytes2, std::string &OutputBuffer) { + static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + const size_t TotalSize = Bytes0.size() + Bytes1.size() + Bytes2.size(); + const size_t Bytes0Size = Bytes0.size(); + const size_t Bytes01Size = Bytes0.size() + Bytes1.size(); + const size_t IndexOffset = OutputBuffer.size(); + OutputBuffer.resize(OutputBuffer.size() + (((TotalSize + 2) / 3) * 4)); + auto IndexInto = [&](size_t i) -> unsigned char { + if (i >= Bytes0Size) { + if (i >= Bytes01Size) { + return Bytes2[i - Bytes01Size]; + } + return Bytes1[i - Bytes0Size]; + } + return Bytes0[i]; + }; + + size_t i = 0, j = 0; + for (size_t n = TotalSize / 3 * 3; i < n; i += 3, j += 4) { + uint32_t x = ((unsigned char)IndexInto(i) << 16) | + ((unsigned char)IndexInto(i + 1) << 8) | + (unsigned char)IndexInto(i + 2); + OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63]; + OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63]; + OutputBuffer[IndexOffset + j + 2] = Table[(x >> 6) & 63]; + OutputBuffer[IndexOffset + j + 3] = Table[x & 63]; + } + if (i + 1 == TotalSize) { + uint32_t x = ((unsigned char)IndexInto(i) << 16); + OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63]; + OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63]; + OutputBuffer[IndexOffset + j + 2] = '='; + OutputBuffer[IndexOffset + j + 3] = '='; + } else if (i + 2 == TotalSize) { + uint32_t x = ((unsigned char)IndexInto(i) << 16) | + ((unsigned char)IndexInto(i + 1) << 8); + OutputBuffer[IndexOffset + j + 0] = Table[(x >> 18) & 63]; + OutputBuffer[IndexOffset + j + 1] = Table[(x >> 12) & 63]; + OutputBuffer[IndexOffset + j + 2] = Table[(x >> 6) & 63]; + OutputBuffer[IndexOffset + j + 3] = '='; + } +} + +void Preprocessor::HandleEmbedDirectiveBuiltin( + SourceLocation HashLoc, const Token &FilenameTok, + StringRef ResolvedFilename, StringRef SearchPath, StringRef RelativePath, + const LexEmbedParametersResult &Params, StringRef BinaryContents, + const size_t TargetCharWidth) { + // if it's empty, just process it like a normal expanded token stream + if (BinaryContents.empty()) { + HandleEmbedDirectiveNaive(HashLoc, FilenameTok.getLocation(), Params, + BinaryContents, TargetCharWidth); + return; + } + SmallVector BinaryPrefix{}; + SmallVector BinarySuffix{}; if (Params.MaybePrefixParam) { - std::copy(Params.MaybePrefixParam->begin(), Params.MaybePrefixParam->end(), - InitListTokens.get() + TokenIndex); - TokenIndex += Params.MaybePrefixParam->size(); - } - for (size_t I = 0; I < BinaryContents.size(); ++I) { - unsigned char ByteValue = BinaryContents[I]; - StringRef ByteRepresentation = IntegerLiterals[ByteValue]; - const size_t InitListIndex = TokenIndex; - Token &IntToken = InitListTokens[InitListIndex]; - IntToken.setKind(tok::numeric_constant); - IntToken.setLiteralData(ByteRepresentation.data()); - IntToken.setLength(ByteRepresentation.size()); - IntToken.setLocation(FilenameLoc); - ++TokenIndex; - bool AtEndOfContents = I == (BinaryContents.size() - 1); - if (!AtEndOfContents) { - const size_t CommaInitListIndex = InitListIndex + 1; - Token &CommaToken = InitListTokens[CommaInitListIndex]; - CommaToken.setKind(tok::comma); - CommaToken.setLocation(FilenameLoc); - ++TokenIndex; + // If we ahve a prefix, validate that it's a good fit for direct data + // embedded (and prepare to prepend it) + const PPEmbedParameterPrefix &PrefixParam = *Params.MaybePrefixParam; + if (!TokenListIsCharacterArray(*this, TargetCharWidth, true, + PrefixParam.Tokens, BinaryPrefix)) { + HandleEmbedDirectiveNaive(HashLoc, FilenameTok.getLocation(), Params, + BinaryContents, TargetCharWidth); + return; } } if (Params.MaybeSuffixParam) { - std::copy(Params.MaybeSuffixParam->begin(), Params.MaybeSuffixParam->end(), - InitListTokens.get() + TokenIndex); - TokenIndex += Params.MaybeSuffixParam->size(); + // If we ahve a prefix, validate that it's a good fit for direct data + // embedding (and prepare to append it) + const PPEmbedParameterSuffix &SuffixParam = *Params.MaybeSuffixParam; + if (!TokenListIsCharacterArray(*this, TargetCharWidth, false, + SuffixParam.Tokens, BinarySuffix)) { + HandleEmbedDirectiveNaive(HashLoc, FilenameTok.getLocation(), Params, + BinaryContents, TargetCharWidth); + return; + } } - assert(TokenIndex == InitListTokensSize); - EnterTokenStream(std::move(InitListTokens), InitListTokensSize, true, false); -} -void Preprocessor::HandleEmbedDirectiveBuiltin(SourceLocation FilenameLoc, - LexEmbedParametersResult &Params, - StringRef BinaryContents, - const size_t TargetCharWidth) { - // TODO: implement direct built-in support - HandleEmbedDirectiveNaive(FilenameLoc, Params, BinaryContents, - TargetCharWidth); + // Load up a new embed buffer for this file and set of parameters in + // particular. + EmbedBuffers.push_back(""); + size_t EmbedBufferNumber = EmbedBuffers.size(); + std::string EmbedBufferNumberVal = std::to_string(EmbedBufferNumber); + llvm::Twine EmbedBufferName = [](const std::string &Number) { + llvm::Twine PrefixNumber = (""); + }(EmbedBufferNumberVal); + std::string &TargetEmbedBuffer = EmbedBuffers.back(); + StringRef TypeName = "unsigned char"; + const size_t TotalSize = + BinaryPrefix.size() + BinaryContents.size() + BinarySuffix.size(); + const size_t ReserveSize = // add up for necessary size: + 19 // __builtin_pp_embed( + + TypeName.size() // type-name + + 2 // ," + + ResolvedFilename.size() // file-name + + 3 // "," + + (((TotalSize + 2) / 3) * 4) // base64-string + + 2 // "); + ; + // Reserve appropriate size + TargetEmbedBuffer.reserve(ReserveSize); + + // Generate the look-alike source file + TargetEmbedBuffer.append("__builtin_pp_embed("); + TargetEmbedBuffer.append(TypeName.data(), TypeName.size()); + TargetEmbedBuffer.append(",\""); + TargetEmbedBuffer.append(ResolvedFilename.data(), ResolvedFilename.size()); + TargetEmbedBuffer.append("\",\""); + // include the prefix(...) and suffix(...) binary data in the total contents + TripleEncodeBase64( + StringRef(BinaryPrefix.data(), BinaryPrefix.size()), BinaryContents, + StringRef(BinarySuffix.data(), BinarySuffix.size()), TargetEmbedBuffer); + TargetEmbedBuffer.append("\")"); + // Create faux-file and its ID, backed by a memory buffer. + std::unique_ptr EmbedMemBuffer = + llvm::MemoryBuffer::getMemBufferCopy(TargetEmbedBuffer, EmbedBufferName); + assert(EmbedMemBuffer && "Cannot create predefined source buffer"); + FileID EmbedBufferFID = SourceMgr.createFileID(std::move(EmbedMemBuffer)); + assert(EmbedBufferFID.isValid() && + "Could not create FileID for #embed directive?"); + // Start parsing the look-alike source file for the embed directive and + // pretend everything is normal + // TODO: (Maybe? )Stop the PPCallbacks from considering this a Real File™. + EnterSourceFile(EmbedBufferFID, nullptr, HashLoc, false); } void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok, const FileEntry *LookupFromFile) { if (!LangOpts.C23 || !LangOpts.CPlusPlus26) { - auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_c23_pp_embed - : diag::warn_cxx26_pp_embed); + auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_cxx26_pp_embed + : diag::warn_c23_pp_embed); Diag(EmbedTok, EitherDiag); } @@ -3952,18 +4193,16 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok, if (Callbacks && Callbacks->EmbedFileNotFound(OriginalFilename)) { return; } - Diag(FilenameTok, diag::err_pp_file_not_found) - << Filename; + Diag(FilenameTok, diag::err_pp_file_not_found) << Filename; return; } std::optional MaybeSignedLimit{}; if (Params.MaybeLimitParam) { - if (static_cast(INT64_MAX) >= *Params.MaybeLimitParam) { - MaybeSignedLimit = static_cast(*Params.MaybeLimitParam); - } + MaybeSignedLimit = static_cast(Params.MaybeLimitParam->Limit); } - llvm::ErrorOr> MaybeFile = getFileManager().getBufferForFile( - *MaybeFileRef, false, false, MaybeSignedLimit); + llvm::ErrorOr> MaybeFile = + getFileManager().getBufferForFile(*MaybeFileRef, false, false, + MaybeSignedLimit); if (!MaybeFile) { // could not find file Diag(FilenameTok, diag::err_cannot_open_file) @@ -3973,7 +4212,7 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok, StringRef BinaryContents = MaybeFile.get()->getBuffer(); if (Params.MaybeOffsetParam) { // offsets all the way to the end of the file make for an empty file. - const size_t OffsetParam = *Params.MaybeOffsetParam; + const size_t &OffsetParam = Params.MaybeOffsetParam->Offset; BinaryContents = BinaryContents.substr(OffsetParam); } const size_t TargetCharWidth = getTargetInfo().getCharWidth(); @@ -4009,11 +4248,12 @@ void Preprocessor::HandleEmbedDirective(SourceLocation HashLoc, Token &EmbedTok, RelativePath); } if (PPOpts->NoBuiltinPPEmbed) { - HandleEmbedDirectiveNaive(FilenameLoc, Params, BinaryContents, + HandleEmbedDirectiveNaive(HashLoc, FilenameLoc, Params, BinaryContents, TargetCharWidth); } else { // emit a token directly, handle it internally. - HandleEmbedDirectiveBuiltin(FilenameLoc, Params, BinaryContents, + HandleEmbedDirectiveBuiltin(HashLoc, FilenameTok, Filename, SearchPath, + RelativePath, Params, BinaryContents, TargetCharWidth); } } diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index 6e0163ccc89b7f..7f6c964b0d68a3 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -1270,8 +1270,8 @@ static bool EvaluateHasIncludeCommon(Token &Tok, IdentifierInfo *II, int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) { // pedwarn for not being on C23 if (!LangOpts.C23 || !LangOpts.CPlusPlus26) { - auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_c23_pp_has_embed - : diag::warn_cxx26_pp_has_embed); + auto EitherDiag = (LangOpts.CPlusPlus ? diag::warn_cxx26_pp_has_embed + : diag::warn_c23_pp_has_embed); Diag(Tok, EitherDiag); } @@ -1321,7 +1321,8 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) { SourceLocation FilenameLoc = Tok.getLocation(); Token FilenameTok = Tok; - Preprocessor::LexEmbedParametersResult Params = this->LexEmbedParameters(Tok, true, false); + Preprocessor::LexEmbedParametersResult Params = + this->LexEmbedParameters(Tok, true, false); if (!Params.Successful) { if (Tok.isNot(tok::eod)) this->DiscardUntilEndOfDirective(); @@ -1339,7 +1340,6 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) { return VALUE__STDC_EMBED_NOT_FOUND__; } - SmallString<128> FilenameBuffer; SmallString<256> RelativePath; StringRef Filename = this->getSpelling(FilenameTok, FilenameBuffer); @@ -1351,11 +1351,10 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) { assert(!Filename.empty()); const FileEntry *LookupFromFile = this->getCurrentFileLexer() ? this->getCurrentFileLexer()->getFileEntry() - : nullptr; + : nullptr; OptionalFileEntryRef MaybeFileEntry = this->LookupEmbedFile(FilenameLoc, Filename, isAngled, false, - LookupFromFile, nullptr, - &RelativePath); + LookupFromFile, nullptr, &RelativePath); if (Callbacks) { Callbacks->HasEmbed(LParenLoc, Filename, isAngled, MaybeFileEntry); } @@ -1363,11 +1362,15 @@ int Preprocessor::EvaluateHasEmbed(Token &Tok, IdentifierInfo *II) { return VALUE__STDC_EMBED_NOT_FOUND__; } size_t FileSize = MaybeFileEntry->getSize(); - if (FileSize == 0 || - (Params.MaybeLimitParam ? *Params.MaybeLimitParam == 0 : false)) { + if (Params.MaybeLimitParam) { + if (FileSize > Params.MaybeLimitParam->Limit) { + FileSize = Params.MaybeLimitParam->Limit; + } + } + if (FileSize == 0) { return VALUE__STDC_EMBED_EMPTY__; } - if (Params.MaybeOffsetParam && *Params.MaybeOffsetParam >= FileSize) { + if (Params.MaybeOffsetParam && Params.MaybeOffsetParam->Offset >= FileSize) { return VALUE__STDC_EMBED_EMPTY__; } return VALUE__STDC_EMBED_FOUND__; diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index ede4c51487ffbe..10eb6d268b37b1 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -1411,7 +1411,8 @@ bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String, return true; } -bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) { +bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value, + bool WithLex) { assert(Tok.is(tok::numeric_constant)); SmallString<8> IntegerBuffer; bool NumberInvalid = false; @@ -1426,7 +1427,8 @@ bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) { llvm::APInt APVal(64, 0); if (Literal.GetIntegerValue(APVal)) return false; - Lex(Tok); + if (WithLex) + Lex(Tok); Value = APVal.getLimitedValue(); return true; } diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index 9dbfc1c8c5e9ff..ef3ae580a43aeb 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -32,6 +32,7 @@ #include "clang/Sema/Scope.h" #include "clang/Sema/TypoCorrection.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Base64.h" #include using namespace clang; @@ -741,6 +742,8 @@ class CastExpressionIdValidator final : public CorrectionCandidateCallback { }; } +// clang-format off + /// Parse a cast-expression, or, if \pisUnaryExpression is true, parse /// a unary-expression. /// @@ -805,6 +808,7 @@ class CastExpressionIdValidator final : public CorrectionCandidateCallback { /// [MS] '__builtin_FUNCSIG' '(' ')' /// [GNU] '__builtin_LINE' '(' ')' /// [CLANG] '__builtin_COLUMN' '(' ')' +/// [CLANG] '__builtin_pp_embed' '(' type-name ',' string-literal ',' string-literal ')' /// [GNU] '__builtin_source_location' '(' ')' /// [GNU] '__builtin_types_compatible_p' '(' type-name ',' type-name ')' /// [GNU] '__null' @@ -924,6 +928,9 @@ class CastExpressionIdValidator final : public CorrectionCandidateCallback { /// '__is_rvalue_expr' /// \endverbatim /// + +// clang-format on + ExprResult Parser::ParseCastExpression(CastParseKind ParseKind, bool isAddressOfOperand, bool &NotCastExpr, @@ -1345,6 +1352,7 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind, case tok::kw___builtin_FUNCSIG: case tok::kw___builtin_LINE: case tok::kw___builtin_source_location: + case tok::kw___builtin_pp_embed: if (NotPrimaryExpression) *NotPrimaryExpression = true; // This parses the complete suffix; we can return early. @@ -2145,6 +2153,7 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) { } else { Expr *Fn = LHS.get(); SourceLocation RParLoc = Tok.getLocation(); + Actions.ModifyCallExprArguments(Fn, Loc, ArgExprs, RParLoc); LHS = Actions.ActOnCallExpr(getCurScope(), Fn, Loc, ArgExprs, RParLoc, ExecConfig); if (LHS.isInvalid()) { @@ -2560,6 +2569,8 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() { return Operand; } +// clang-format off + /// ParseBuiltinPrimaryExpression /// /// \verbatim @@ -2575,6 +2586,7 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() { /// [MS] '__builtin_FUNCSIG' '(' ')' /// [GNU] '__builtin_LINE' '(' ')' /// [CLANG] '__builtin_COLUMN' '(' ')' +/// [CLANG] '__builtin_pp_embed' '(' 'type-name ',' string-literal ',' string-literal ')' /// [GNU] '__builtin_source_location' '(' ')' /// [OCL] '__builtin_astype' '(' assignment-expression ',' type-name ')' /// @@ -2583,6 +2595,8 @@ ExprResult Parser::ParseUnaryExprOrTypeTraitExpression() { /// [GNU] offsetof-member-designator '.' identifier /// [GNU] offsetof-member-designator '[' expression ']' /// \endverbatim + +// clang-format on ExprResult Parser::ParseBuiltinPrimaryExpression() { ExprResult Res; const IdentifierInfo *BuiltinII = Tok.getIdentifierInfo(); @@ -2841,6 +2855,96 @@ ExprResult Parser::ParseBuiltinPrimaryExpression() { Res = Actions.ActOnSourceLocExpr(Kind, StartLoc, ConsumeParen()); break; } + case tok::kw___builtin_pp_embed: { + SourceRange DataTyExprSourceRange{}; + TypeResult DataTyExpr(ParseTypeName(&DataTyExprSourceRange)); + + if (ExpectAndConsume(tok::comma)) { + SkipUntil(tok::r_paren, StopAtSemi); + Res = ExprError(); + } + + ExprResult FilenameArgExpr(ParseStringLiteralExpression()); + + if (ExpectAndConsume(tok::comma)) { + SkipUntil(tok::r_paren, StopAtSemi); + Res = ExprError(); + } + + ExprResult Base64ArgExpr(ParseStringLiteralExpression()); + + if (Tok.isNot(tok::r_paren)) { + Diag(Tok, diag::err_expected) << tok::r_paren; + Res = ExprError(); + } + + const ASTContext &Context = Actions.getASTContext(); + QualType DataTy = Context.UnsignedCharTy; + size_t TargetWidth = Context.getTypeSize(DataTy); + if (DataTyExpr.isInvalid()) { + Res = ExprError(); + } else { + DataTy = DataTyExpr.get().get().getCanonicalType(); + TargetWidth = Context.getTypeSize(DataTy); + if (DataTy.getUnqualifiedType() != Context.UnsignedCharTy && + DataTy.getUnqualifiedType() != Context.CharTy) { + // TODO: check if is exactly the same as unsigned char + Diag(DataTyExprSourceRange.getBegin(), + diag::err_builtin_pp_embed_invalid_argument) + << "only 'char' and 'unsigned char' are supported"; + Res = ExprError(); + } + if ((TargetWidth % CHAR_BIT) != 0) { + Diag(DataTyExprSourceRange.getBegin(), + diag::err_builtin_pp_embed_invalid_argument) + << "width of element type is not a multiple of host platform's " + "CHAR_BIT!"; + Res = ExprError(); + } + } + + StringLiteral *FilenameLiteral = nullptr; + if (FilenameArgExpr.isInvalid()) { + Res = ExprError(); + } else { + FilenameLiteral = FilenameArgExpr.getAs(); + } + + std::vector BinaryData{}; + if (Base64ArgExpr.isInvalid()) { + Res = ExprError(); + } else { + StringLiteral *Base64Str = Base64ArgExpr.getAs(); + StringRef Base64StrData = Base64Str->getBytes(); + if (Base64Str->getKind() != StringLiteral::Ordinary) { + Diag(Base64Str->getExprLoc(), diag::err_expected_string_literal) + << 0 + << "'__builtin_pp_embed' with valid base64 encoding that is an " + "ordinary \"...\" string"; + } + const auto OnDecodeError = [&](const llvm::ErrorInfoBase &) { + Diag(Base64Str->getExprLoc(), + diag::err_builtin_pp_embed_invalid_argument) + << "expected a valid base64 encoded string"; + }; + llvm::Error Err = llvm::decodeBase64(Base64Str->getBytes(), BinaryData); + llvm::handleAllErrors(std::move(Err), OnDecodeError); + if (((BinaryData.size() * CHAR_BIT) % TargetWidth) != 0) { + Diag(DataTyExprSourceRange.getBegin(), + diag::err_builtin_pp_embed_invalid_argument) + << "size of data does not split evently into the number of bytes " + "requested"; + Res = ExprError(); + } + } + + if (!Res.isInvalid()) { + Res = Actions.ActOnPPEmbedExpr( + StartLoc, Base64ArgExpr.get()->getExprLoc(), ConsumeParen(), + FilenameLiteral, DataTy, std::move(BinaryData)); + } + break; + } } if (Res.isInvalid()) diff --git a/clang/lib/Parse/ParseTemplate.cpp b/clang/lib/Parse/ParseTemplate.cpp index f556d0e6d4f8b6..8364519861fe4f 100644 --- a/clang/lib/Parse/ParseTemplate.cpp +++ b/clang/lib/Parse/ParseTemplate.cpp @@ -1671,6 +1671,8 @@ bool Parser::ParseTemplateArgumentList(TemplateArgList &TemplateArgs, // arguments. } while (TryConsumeToken(tok::comma)); + Actions.ModifyTemplateArguments(Template, TemplateArgs); + return false; } diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index f249d41bc9bfbb..44d8ddba080d82 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -13336,6 +13336,54 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init, bool DirectInit) { return; } + // Adjust the init expression for PPEmbedExpr as early as possible + // here. + bool AlreadyAdjustedPPEmbedExpr = false; + if (InitListExpr *ILExpr = dyn_cast_if_present(Init); ILExpr) { + QualType VDeclTy = VDecl->getType(); + ArrayRef Inits = ILExpr->inits(); + if (CheckExprListForPPEmbedExpr(Inits, VDeclTy) == PPEmbedExpr::FoundOne) { + PPEmbedExpr *PPEmbed = dyn_cast_if_present(Inits[0]); + ILExpr->setInit(0, PPEmbed->getDataStringLiteral()); + AlreadyAdjustedPPEmbedExpr = true; + } + } + + if (!AlreadyAdjustedPPEmbedExpr) { + // If there is a PPEmbedExpr as a single initializer without braces, + // make sure it only produces a single element (and then expand said + // element). + if (PPEmbedExpr *PPEmbed = dyn_cast_if_present(Init); + PPEmbed) { + if (PPEmbed->getDataElementCount(Context) == 1) { + // Expand the list in-place immediately, let the natural work take hold + Init = ExpandSinglePPEmbedExpr(PPEmbed); + } else { + // `__builtin_pp_embed( ... )` only produces 2 or more values. + Diag(RealDecl->getLocation(), diag::err_illegal_initializer_type) + << "'__builtin_pp_embed'"; + RealDecl->setInvalidDecl(); + return; + } + } + + // Legitimately, in all other cases, COMPLETELY nuke the PPEmbedExpr + // and turn it into a list of integers where applicable. + if (InitListExpr *ILExpr = dyn_cast_if_present(Init); + ILExpr) { + ArrayRef Inits = ILExpr->inits(); + SmallVector OutputExprList{}; + if (ExpandPPEmbedExprInExprList(Inits, OutputExprList, false) == + PPEmbedExpr::Expanded) { + ILExpr->resizeInits(Context, OutputExprList.size()); + for (size_t I = 0; I < OutputExprList.size(); ++I) { + auto &InitExpr = OutputExprList[I]; + ILExpr->setInit(I, InitExpr); + } + } + } + } + // WebAssembly tables can't be used to initialise a variable. if (Init && !Init->getType().isNull() && Init->getType()->isWebAssemblyTableType()) { diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index f9c010b1a00248..37321d2417a7d2 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -17022,7 +17022,8 @@ Decl *Sema::ActOnStaticAssertDeclaration(SourceLocation StaticAssertLoc, SourceLocation RParenLoc) { if (DiagnoseUnexpandedParameterPack(AssertExpr, UPPC_StaticAssertExpression)) return nullptr; - + if (DiagnosePPEmbedExpr(AssertExpr, StaticAssertLoc, PPEEC_StaticAssert)) + return nullptr; return BuildStaticAssertDeclaration(StaticAssertLoc, AssertExpr, AssertMessageExpr, RParenLoc, false); } diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp index 75730ea888afb4..ebeed7f4d2b485 100644 --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp @@ -1412,6 +1412,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) { case Expr::SizeOfPackExprClass: case Expr::StringLiteralClass: case Expr::SourceLocExprClass: + case Expr::PPEmbedExprClass: case Expr::ConceptSpecializationExprClass: case Expr::RequiresExprClass: // These expressions can never throw. diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index cf45fc388083ce..c10e6501daef6e 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -7110,6 +7110,13 @@ static void DiagnosedUnqualifiedCallsToStdFunctions(Sema &S, << FixItHint::CreateInsertion(DRE->getLocation(), "std::"); } +void Sema::ModifyCallExprArguments(Expr *Fn, SourceLocation LParenLoc, + SmallVectorImpl &ArgExprs, + SourceLocation RParenLoc) { + [[maybe_unused]] PPEmbedExpr::Action Action = + ExpandPPEmbedExprInExprList(ArgExprs); +} + ExprResult Sema::ActOnCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc, MultiExprArg ArgExprs, SourceLocation RParenLoc, Expr *ExecConfig) { @@ -7947,8 +7954,17 @@ Sema::BuildInitList(SourceLocation LBraceLoc, MultiExprArg InitArgList, } } - InitListExpr *E = new (Context) InitListExpr(Context, LBraceLoc, InitArgList, - RBraceLoc); + InitListExpr *E = nullptr; + if (InitArgList.size() > 1 && + CheckExprListForPPEmbedExpr(InitArgList, std::nullopt) != + PPEmbedExpr::NotFound) { + SmallVector OutputExprList; + ExpandPPEmbedExprInExprList(InitArgList, OutputExprList); + E = new (Context) + InitListExpr(Context, LBraceLoc, OutputExprList, RBraceLoc); + } else { + E = new (Context) InitListExpr(Context, LBraceLoc, InitArgList, RBraceLoc); + } E->setType(Context.VoidTy); // FIXME: just a place holder for now. return E; } @@ -17570,6 +17586,225 @@ ExprResult Sema::BuildSourceLocExpr(SourceLocExpr::IdentKind Kind, SourceLocExpr(Context, Kind, ResultTy, BuiltinLoc, RPLoc, ParentContext); } +ExprResult Sema::ActOnPPEmbedExpr(SourceLocation BuiltinLoc, + SourceLocation Base64DataLocation, + SourceLocation RPLoc, StringLiteral *Filename, + QualType ElementTy, + std::vector BinaryData) { + uint64_t ArraySizeRawVal[] = {BinaryData.size()}; + llvm::APSInt ArraySize(llvm::APInt(Context.getTypeSize(Context.getSizeType()), + 1, ArraySizeRawVal)); + QualType ArrayTy = Context.getConstantArrayType(ElementTy, ArraySize, nullptr, + ArrayType::Normal, 0); + StringLiteral *BinaryDataLiteral = StringLiteral::Create( + Context, StringRef(BinaryData.data(), BinaryData.size()), + StringLiteral::Ordinary, false, ArrayTy, Base64DataLocation); + return new (Context) + PPEmbedExpr(Context, ElementTy, Filename, BinaryDataLiteral, BuiltinLoc, + RPLoc, CurContext); +} + +IntegerLiteral *Sema::ExpandSinglePPEmbedExpr(PPEmbedExpr *PPEmbed) { + assert(PPEmbed->getDataElementCount(Context) == 1 && + "Data should only contain a single element"); + StringLiteral *DataLiteral = PPEmbed->getDataStringLiteral(); + QualType ElementTy = PPEmbed->getType(); + const size_t TargetWidth = Context.getTypeSize(ElementTy); + const size_t BytesPerElement = CHAR_BIT / TargetWidth; + StringRef Data = DataLiteral->getBytes(); + SmallVector ByteVals{}; + for (size_t ValIndex = 0; ValIndex < BytesPerElement; ++ValIndex) { + if ((ValIndex % sizeof(uint64_t)) == 0) { + ByteVals.push_back(0); + } + const unsigned char DataByte = Data[ValIndex]; + ByteVals.back() |= + (static_cast(DataByte) << (ValIndex * CHAR_BIT)); + } + ArrayRef ByteValsRef(ByteVals); + return IntegerLiteral::Create(Context, llvm::APInt(TargetWidth, ByteValsRef), + ElementTy, DataLiteral->getBeginLoc()); +} + +PPEmbedExpr::Action +Sema::CheckExprListForPPEmbedExpr(ArrayRef ExprList, + std::optional MaybeInitType) { + if (ExprList.empty()) { + return PPEmbedExpr::NotFound; + } + PPEmbedExpr *First = ExprList.size() == 1 + ? dyn_cast_if_present(ExprList[0]) + : nullptr; + if (First) { + // only one and it's an embed + if (MaybeInitType) { + // With the type information, we have a duty to check if it matches; + // if not, explode it out into a list of integer literals. + QualType &InitType = *MaybeInitType; + if (InitType->isArrayType()) { + const ArrayType *InitArrayType = InitType->getAsArrayTypeUnsafe(); + QualType InitElementTy = InitArrayType->getElementType(); + QualType PPEmbedExprElementTy = First->getType(); + const bool TypesMatch = + Context.typesAreCompatible(InitElementTy, PPEmbedExprElementTy) || + (InitElementTy->isCharType() && PPEmbedExprElementTy->isCharType()); + if (TypesMatch) { + // Keep the PPEmbedExpr, report that everything has been found. + return PPEmbedExpr::FoundOne; + } + } + } else { + // leave it, possibly adjusted later! + return PPEmbedExpr::FoundOne; + } + } + if (std::find_if(ExprList.begin(), ExprList.end(), + [](const Expr *const SomeExpr) { + return isa(SomeExpr); + }) == ExprList.end()) { + // We didn't find one. + return PPEmbedExpr::NotFound; + } + // Otherwise, we found one but it is not the sole entry in the initialization + // list. + return PPEmbedExpr::Expanded; +} + +PPEmbedExpr::Action +Sema::ExpandPPEmbedExprInExprList(SmallVectorImpl &ExprList) { + PPEmbedExpr::Action Action = PPEmbedExpr::NotFound; + SmallVector ByteVals{}; + for (size_t I = 0; I < ExprList.size();) { + Expr *&OriginalExpr = ExprList[I]; + PPEmbedExpr *PPEmbed = dyn_cast_if_present(OriginalExpr); + if (!PPEmbed) { + ++I; + continue; + } + auto ExprListIt = ExprList.erase(&OriginalExpr); + const size_t ExpectedDataElements = PPEmbed->getDataElementCount(Context); + if (ExpectedDataElements == 0) { + // No ++I, we are already pointing to newest element. + continue; + } + Action = PPEmbedExpr::Expanded; + StringLiteral *DataLiteral = PPEmbed->getDataStringLiteral(); + QualType ElementTy = PPEmbed->getType(); + const size_t TargetWidth = Context.getTypeSize(ElementTy); + const size_t BytesPerElement = CHAR_BIT / TargetWidth; + StringRef Data = DataLiteral->getBytes(); + size_t Insertions = 0; + for (size_t ByteIndex = 0; ByteIndex < Data.size(); + ByteIndex += BytesPerElement) { + ByteVals.clear(); + for (size_t ValIndex = 0; ValIndex < BytesPerElement; ++ValIndex) { + if ((ValIndex % sizeof(uint64_t)) == 0) { + ByteVals.push_back(0); + } + const unsigned char DataByte = Data[ByteIndex + ValIndex]; + ByteVals.back() |= + (static_cast(DataByte) << (ValIndex * CHAR_BIT)); + } + ArrayRef ByteValsRef(ByteVals); + IntegerLiteral *IntLit = + IntegerLiteral::Create(Context, llvm::APInt(TargetWidth, ByteValsRef), + ElementTy, DataLiteral->getBeginLoc()); + ExprListIt = ExprList.insert(ExprListIt, IntLit); + ++Insertions; + // make sure we are inserting **after** the item we just inserted, not + // before + ++ExprListIt; + } + assert(Insertions == ExpectedDataElements); + I += Insertions; + } + return PPEmbedExpr::Expanded; +} + +PPEmbedExpr::Action +Sema::ExpandPPEmbedExprInExprList(ArrayRef ExprList, + SmallVectorImpl &OutputExprList, + bool ClearOutputFirst) { + if (ClearOutputFirst) { + OutputExprList.clear(); + } + size_t ExpectedResize = OutputExprList.size() + ExprList.size(); + const auto FindPPEmbedExpr = [](const Expr *const SomeExpr) { + return isa(SomeExpr); + }; + if (std::find_if(ExprList.begin(), ExprList.end(), FindPPEmbedExpr) == + ExprList.end()) { + return PPEmbedExpr::NotFound; + } + SmallVector ByteVals{}; + OutputExprList.reserve(ExpectedResize); + for (size_t I = 0; I < ExprList.size(); ++I) { + Expr *OriginalExpr = ExprList[I]; + PPEmbedExpr *PPEmbed = dyn_cast_if_present(OriginalExpr); + if (!PPEmbed) { + OutputExprList.push_back(OriginalExpr); + continue; + } + StringLiteral *DataLiteral = PPEmbed->getDataStringLiteral(); + QualType ElementTy = PPEmbed->getType(); + const size_t TargetWidth = Context.getTypeSize(ElementTy); + const size_t BytesPerElement = CHAR_BIT / TargetWidth; + StringRef Data = DataLiteral->getBytes(); + for (size_t ByteIndex = 0; ByteIndex < Data.size(); + ByteIndex += BytesPerElement) { + ByteVals.clear(); + for (size_t ValIndex = 0; ValIndex < BytesPerElement; ++ValIndex) { + if ((ValIndex % sizeof(uint64_t)) == 0) { + ByteVals.push_back(0); + } + const unsigned char DataByte = Data[ByteIndex + ValIndex]; + ByteVals.back() |= + (static_cast(DataByte) << (ValIndex * CHAR_BIT)); + } + ArrayRef ByteValsRef(ByteVals); + IntegerLiteral *IntLit = + IntegerLiteral::Create(Context, llvm::APInt(TargetWidth, ByteValsRef), + ElementTy, DataLiteral->getBeginLoc()); + OutputExprList.push_back(IntLit); + } + } + return PPEmbedExpr::Expanded; +} + +StringRef Sema::GetLocationName(PPEmbedExprContext Context) const { + switch (Context) { + default: + llvm_unreachable("unhandled PPEmbedExprContext value"); + case PPEEC__StaticAssert: + return "_Static_assert"; + case PPEEC_StaticAssert: + return "static_assert"; + } +} + +bool Sema::DiagnosePPEmbedExpr(Expr *&E, SourceLocation ContextLocation, + PPEmbedExprContext PPEmbedContext, + bool SingleAllowed) { + PPEmbedExpr *PPEmbed = dyn_cast_if_present(E); + if (!PPEmbed) + return true; + + if (SingleAllowed && PPEmbed->getDataElementCount(Context) == 1) { + E = ExpandSinglePPEmbedExpr(PPEmbed); + return true; + } + + StringRef LocationName = GetLocationName(PPEmbedContext); + StringRef DiagnosticMessage = + (SingleAllowed ? "cannot use a preprocessor embed that expands to " + "nothing or expands to " + "more than one item in " + : "cannot use a preprocessor embed in "); + Diag(ContextLocation, diag::err_builtin_pp_embed_invalid_location) + << DiagnosticMessage << 1 << LocationName; + return false; +} + bool Sema::CheckConversionToObjCLiteral(QualType DstType, Expr *&Exp, bool Diagnose) { if (!getLangOpts().ObjC) diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index ff370dd1e080b2..234e678c71b140 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -1623,6 +1623,62 @@ NamedDecl *Sema::ActOnNonTypeTemplateParameter(Scope *S, Declarator &D, return Param; } +void Sema::ModifyTemplateArguments( + const TemplateTy &Template, + SmallVectorImpl &TemplateArgs) { + SmallVector ByteVals{}; + for (size_t I = 0; I < TemplateArgs.size();) { + ParsedTemplateArgument &OriginalArg = TemplateArgs[I]; + if (OriginalArg.getKind() != ParsedTemplateArgument::NonType) { + ++I; + continue; + } + PPEmbedExpr *PPEmbed = dyn_cast(OriginalArg.getAsExpr()); + if (!PPEmbed) { + ++I; + continue; + } + auto TemplateArgListIt = TemplateArgs.erase(&OriginalArg); + const size_t ExpectedDataElements = PPEmbed->getDataElementCount(Context); + if (ExpectedDataElements == 0) { + // No ++I; already pointing at the right element! + continue; + } + StringLiteral *DataLiteral = PPEmbed->getDataStringLiteral(); + QualType ElementTy = PPEmbed->getType(); + const size_t TargetWidth = Context.getTypeSize(ElementTy); + const size_t BytesPerElement = CHAR_BIT / TargetWidth; + StringRef Data = DataLiteral->getBytes(); + size_t Insertions = 0; + for (size_t ByteIndex = 0; ByteIndex < Data.size(); + ByteIndex += BytesPerElement) { + ByteVals.clear(); + for (size_t ValIndex = 0; ValIndex < BytesPerElement; ++ValIndex) { + if ((ValIndex % sizeof(uint64_t)) == 0) { + ByteVals.push_back(0); + } + const unsigned char DataByte = Data[ByteIndex + ValIndex]; + ByteVals.back() |= + (static_cast(DataByte) << (ValIndex * CHAR_BIT)); + } + ArrayRef ByteValsRef(ByteVals); + IntegerLiteral *IntLit = + IntegerLiteral::Create(Context, llvm::APInt(TargetWidth, ByteValsRef), + ElementTy, DataLiteral->getBeginLoc()); + TemplateArgListIt = TemplateArgs.insert( + TemplateArgListIt, + ParsedTemplateArgument(ParsedTemplateArgument::NonType, IntLit, + OriginalArg.getLocation())); + ++Insertions; + // make sure we are inserting **after** the item we just inserted, not + // before + ++TemplateArgListIt; + } + assert(Insertions == ExpectedDataElements); + I += Insertions; + } +} + /// ActOnTemplateTemplateParameter - Called when a C++ template template /// parameter (e.g. T in template