Skip to content

Commit

Permalink
feat(verifier): warn on and allow specifying default file corpora (ky…
Browse files Browse the repository at this point in the history
…the#5933)

* feat(verifier): warn on and allow specifying default file corpora

This PR adds a flag to specify default file vname corpus values
for anchor goals. These are chiefly useful when providing files
as additional input to the verifier with --file_vnames on (the
default). It's meant to help migrate indexers and test harnesses
that set file corpora in anchors but do not also provide them
for the corresponding file nodes (which happens surprisingly often),
causing tests to fail when anchor corpora are checked (as happens
in the fast solver, but has been "relaxed" in the old solver for
a very long time).
  • Loading branch information
zrlk authored Nov 16, 2023
1 parent 5ec4af3 commit 3c922e0
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 7 deletions.
45 changes: 38 additions & 7 deletions kythe/cxx/verifier/verifier.cc
Original file line number Diff line number Diff line change
Expand Up @@ -632,7 +632,10 @@ Verifier::Verifier(bool trace_lex, bool trace_parse)
builtin_location_.initialize(&builtin_location_name_);
builtin_location_.begin.column = 1;
builtin_location_.end.column = 1;
empty_string_id_ = IdentifierFor(builtin_location_, "");
auto* empty_string = IdentifierFor(builtin_location_, "");
empty_string_id_ = empty_string;
empty_string_sym_ = empty_string->symbol();
default_file_corpus_ = empty_string;
fact_id_ = IdentifierFor(builtin_location_, "fact");
vname_id_ = IdentifierFor(builtin_location_, "vname");
kind_id_ = IdentifierFor(builtin_location_, "/kythe/node/kind");
Expand Down Expand Up @@ -1137,6 +1140,32 @@ Verifier::InternedVName Verifier::InternVName(AstNode* node) {
tuple->element(4)->AsIdentifier()->symbol()};
}

AstNode* Verifier::FixFileVName(AstNode* node) {
if (auto* app = node->AsApp()) {
if (auto* tuple = app->rhs()->AsTuple(); tuple->size() == 5) {
if (auto* corpus_id = tuple->element(1)->AsIdentifier()) {
if (corpus_id->symbol() == empty_string_sym_) {
if (auto* path_id = tuple->element(3)->AsIdentifier()) {
fprintf(stderr,
"Warning: file '%s' is missing a corpus in its VName.\n",
symbol_table_.text(path_id->symbol()).c_str());
}
AstNode** values = (AstNode**)arena_.New(sizeof(AstNode*) * 5);
values[0] = tuple->element(0);
values[1] = default_file_corpus_;
values[2] = tuple->element(2);
values[3] = tuple->element(3);
values[4] = tuple->element(4);
AstNode* new_tuple =
new (&arena_) Tuple(builtin_location_, 5, values);
return new (&arena_) App(vname_id_, new_tuple);
}
}
}
}
return node;
}

bool Verifier::ProcessFactTupleForFastSolver(Tuple* tuple) {
// TODO(zarko): None of the text processing supports non-UTF8 encoded files.
if (tuple->element(1) == empty_string_id_ &&
Expand All @@ -1147,10 +1176,11 @@ bool Verifier::ProcessFactTupleForFastSolver(Tuple* tuple) {
auto sym = fast_solver_files_.insert({vname, known_file_sym_});
if (!sym.second && sym.first->second != known_not_file_sym_) {
if (assertions_from_file_nodes_) {
return LoadInMemoryRuleFile("", tuple->element(0),
return LoadInMemoryRuleFile("", FixFileVName(tuple->element(0)),
sym.first->second);
} else {
content_to_vname_[sym.first->second] = tuple->element(0);
content_to_vname_[sym.first->second] =
FixFileVName(tuple->element(0));
}
}
} else {
Expand All @@ -1162,9 +1192,10 @@ bool Verifier::ProcessFactTupleForFastSolver(Tuple* tuple) {
auto file = fast_solver_files_.insert({vname, content});
if (!file.second && file.first->second == known_file_sym_) {
if (assertions_from_file_nodes_) {
return LoadInMemoryRuleFile("", tuple->element(0), content);
return LoadInMemoryRuleFile("", FixFileVName(tuple->element(0)),
content);
} else {
content_to_vname_[content] = tuple->element(0);
content_to_vname_[content] = FixFileVName(tuple->element(0));
}
}
}
Expand Down Expand Up @@ -1221,13 +1252,13 @@ bool Verifier::PrepareDatabase() {
if (EncodedVNameOrIdentEqualTo(last_file_vname, tb->element(0))) {
if (assertions_from_file_nodes_) {
if (!LoadInMemoryRuleFile(
"", tb->element(0),
"", FixFileVName(tb->element(0)),
tb->element(4)->AsIdentifier()->symbol())) {
is_ok = false;
}
} else {
content_to_vname_[tb->element(4)->AsIdentifier()->symbol()] =
tb->element(0);
FixFileVName(tb->element(0));
}
}
last_file_vname = nullptr;
Expand Down
15 changes: 15 additions & 0 deletions kythe/cxx/verifier/verifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,13 +234,22 @@ class Verifier {
/// will no longer have access to the internal AST.
std::string InspectionString(const Inspection& i);

/// \brief Use `corpus` for file nodes without a corpus set.
void UseDefaultFileCorpus(const std::string& corpus) {
default_file_corpus_ = IdentifierFor(builtin_location_, corpus);
}

private:
using InternedVName = std::tuple<Symbol, Symbol, Symbol, Symbol, Symbol>;

/// \brief Interns an AST node known to be a VName.
/// \param node the node to intern.
InternedVName InternVName(AstNode* node);

/// \return a new vname with its corpus filled with the default file corpus
/// if `node` is a vname without a corpus set; otherwise `node`.
AstNode* FixFileVName(AstNode* node);

/// \brief Generate a VName that will not conflict with any other VName.
AstNode* NewUniqueVName(const yy::location& loc);

Expand Down Expand Up @@ -497,6 +506,12 @@ class Verifier {

/// Maps VNames to known_file_sym_, known_not_file_sym_, or file text.
absl::flat_hash_map<InternedVName, Symbol> fast_solver_files_;

/// File corpus to use if none is set on a file node.
AstNode* default_file_corpus_;

/// The symbol for the empty string.
Symbol empty_string_sym_;
};

} // namespace verifier
Expand Down
8 changes: 8 additions & 0 deletions kythe/cxx/verifier/verifier_main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ ABSL_FLAG(bool, use_fast_solver, false,
"SUPPORTED.");
ABSL_FLAG(bool, print_timing_information, false,
"Print timing information for profiling.");
ABSL_FLAG(std::string, default_file_corpus, "",
"Use this corpus for anchor goals for file nodes without a corpus "
"set. In the future, if this flag is left empty, these file nodes "
"will raise an error.");

namespace {
// The fast solver needs extra stack space for modest programs.
Expand Down Expand Up @@ -153,6 +157,10 @@ invocation and rule syntax.
v.ShowFactPrefix();
}

if (!absl::GetFlag(FLAGS_default_file_corpus).empty()) {
v.UseDefaultFileCorpus(absl::GetFlag(FLAGS_default_file_corpus));
}

v.UseFastSolver(absl::GetFlag(FLAGS_use_fast_solver));

if (absl::GetFlag(FLAGS_use_fast_solver)) {
Expand Down
33 changes: 33 additions & 0 deletions kythe/cxx/verifier/verifier_unit_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2592,6 +2592,39 @@ fact_value: "//- A.node/kind file\n"
ASSERT_TRUE(v.VerifyAllGoals());
}

TEST_P(VerifierTest, ReadGoalsFromFileNodeEmptyCorpusSuccess) {
v.UseFileNodes();
v.UseDefaultFileCorpus("testcorpus");
ASSERT_TRUE(v.LoadInlineProtoFile(R"(entries {
source { path:"test" }
fact_name: "/kythe/node/kind"
fact_value: "file"
}
entries {
source { path:"test" }
fact_name: "/kythe/text"
fact_value: "//- @a.node/kind anchor\na"
}
entries {
source { signature: "a" corpus:"testcorpus" path:"test" }
fact_name: "/kythe/node/kind"
fact_value: "anchor"
}
entries {
source { signature: "a" corpus:"testcorpus" path:"test" }
fact_name: "/kythe/loc/start"
fact_value: "24"
}
entries {
source { signature: "a" corpus:"testcorpus" path:"test" }
fact_name: "/kythe/loc/end"
fact_value: "25"
}
)"));
ASSERT_TRUE(v.PrepareDatabase());
ASSERT_TRUE(v.VerifyAllGoals());
}

TEST_P(VerifierTest, ReadGoalsFromFileNodeFailParse) {
v.UseFileNodes();
bool parsed = v.LoadInlineProtoFile(R"(entries {
Expand Down

0 comments on commit 3c922e0

Please sign in to comment.