Skip to content

Commit

Permalink
[idl_parser] Track included files by hash
Browse files Browse the repository at this point in the history
Parser::included_files is a map whose main purpose is to keep track of
which files have already been parsed in order to protect against
multiple inclusion. Its key is the path that the file was found at
during parsing (or, if it's an in-memory file, just its name).

This commit changes the key to be the 64 bit FNV-1a hash of the file's
name (just the name, not the complete path) xor'd with the hash of the
file's contents (unless it's an in-memory file, then we only hash the
name.)

This allows multiple include protection to function even in the face of
unique per-file include paths (fixes #6425).
  • Loading branch information
mmmspatz committed Feb 1, 2021
1 parent 63f2adf commit 6634953
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 10 deletions.
2 changes: 1 addition & 1 deletion include/flatbuffers/idl.h
Original file line number Diff line number Diff line change
Expand Up @@ -993,7 +993,7 @@ class Parser : public ParserState {
std::string file_identifier_;
std::string file_extension_;

std::map<std::string, std::string> included_files_;
std::map<uint64_t, std::string> included_files_;
std::map<std::string, std::set<std::string>> files_included_per_file_;
std::vector<std::string> native_included_files_;

Expand Down
36 changes: 27 additions & 9 deletions src/idl_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3153,13 +3153,29 @@ CheckedError Parser::ParseRoot(const char *source, const char **include_paths,
return NoError();
}

// Generate a unique hash for a file based on its name and contents (if any).
static uint64_t HashFile(const char *source_filename, const char *source) {
auto stripped_source_fillename = StripPath(source_filename);
auto hash = HashFnv1a<uint64_t>(stripped_source_fillename.c_str());
if (source && *source) hash ^= HashFnv1a<uint64_t>(source);

return hash;
}

CheckedError Parser::DoParse(const char *source, const char **include_paths,
const char *source_filename,
const char *include_filename) {
uint64_t source_hash = 0;
if (source_filename) {
if (included_files_.find(source_filename) == included_files_.end()) {
included_files_[source_filename] =
include_filename ? include_filename : "";
// If the file is in-memory, don't include its contents in the hash as we
// won't be able to load them later.
if (FileExists(source_filename))
source_hash = HashFile(source_filename, source);
else
source_hash = HashFile(source_filename, nullptr);

if (included_files_.find(source_hash) == included_files_.end()) {
included_files_[source_hash] = include_filename ? include_filename : "";
files_included_per_file_[source_filename] = std::set<std::string>();
} else {
return NoError();
Expand Down Expand Up @@ -3210,12 +3226,14 @@ CheckedError Parser::DoParse(const char *source, const char **include_paths,
return Error("unable to locate include file: " + name);
if (source_filename)
files_included_per_file_[source_filename].insert(filepath);
if (included_files_.find(filepath) == included_files_.end()) {

std::string contents;
bool file_loaded = LoadFile(filepath.c_str(), true, &contents);
if (included_files_.find(HashFile(filepath.c_str(), contents.c_str())) ==
included_files_.end()) {
// We found an include file that we have not parsed yet.
// Load it and parse it.
std::string contents;
if (!LoadFile(filepath.c_str(), true, &contents))
return Error("unable to load include file: " + name);
// Parse it.
if (!file_loaded) return Error("unable to load include file: " + name);
ECHECK(DoParse(contents.c_str(), include_paths, filepath.c_str(),
name.c_str()));
// We generally do not want to output code for any included files:
Expand All @@ -3232,7 +3250,7 @@ CheckedError Parser::DoParse(const char *source, const char **include_paths,
// entered into included_files_.
// This is recursive, but only go as deep as the number of include
// statements.
if (source_filename) { included_files_.erase(source_filename); }
included_files_.erase(source_hash);
return DoParse(source, include_paths, source_filename,
include_filename);
}
Expand Down

0 comments on commit 6634953

Please sign in to comment.