Skip to content

Commit

Permalink
opt3: deduplicate_primitives (#222)
Browse files Browse the repository at this point in the history
  • Loading branch information
msm-cert authored Oct 1, 2024
1 parent 3ffacf7 commit 18a45d8
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 2 deletions.
8 changes: 7 additions & 1 deletion libursa/Query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,16 @@ const QString &Query::as_value() const {
if (type != QueryType::PRIMITIVE) {
throw std::runtime_error("This query doesn\'t have any value.");
}

return value;
}

PrimitiveQuery Query::as_ngram() const {
if (type != QueryType::PRIMITIVE) {
throw std::runtime_error("This query doesn\'t contain a ngram.");
}
return ngram;
}

std::string Query::as_string_repr() const {
std::string out = "";
if (value.empty()) {
Expand Down
1 change: 1 addition & 0 deletions libursa/Query.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class Query {
std::vector<Query> &as_queries();
const QString &as_value() const;
uint32_t as_count() const;
PrimitiveQuery as_ngram() const;
std::string as_string_repr() const;
const QueryType &get_type() const;
bool operator==(const Query &other) const;
Expand Down
24 changes: 24 additions & 0 deletions libursa/QueryOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,29 @@ Query inline_suboperations(Query &&q, bool *changed) {
return std::move(Query(q.get_type(), std::move(newqueries)));
}

// This optimization gets rid of duplicated primitive queries.
// AND(a, a, a, a, b, b) == AND(a, b)
// This also applies to OR(), but it'll happen very rarely.
Query deduplicate_primitives(Query &&q, bool *changed) {
if (q.get_type() != QueryType::AND && q.get_type() != QueryType::OR) {
return std::move(q);
}

std::set<PrimitiveQuery> seen;
std::vector<Query> newqueries;
for (auto &&query : q.as_queries()) {
if (query.get_type() != QueryType::PRIMITIVE) {
newqueries.emplace_back(std::move(query));
} else if (seen.count(query.as_ngram()) == 0) {
newqueries.emplace_back(std::move(query));
seen.insert(query.as_ngram());
} else {
*changed = true;
}
}
return std::move(Query(q.get_type(), std::move(newqueries)));
}

Query q_optimize(Query &&q) {
if (q.get_type() == QueryType::PRIMITIVE) {
// Nothing to improve here.
Expand All @@ -65,6 +88,7 @@ Query q_optimize(Query &&q) {
changed = false;
q = flatten_trivial_operations(std::move(q), &changed);
q = inline_suboperations(std::move(q), &changed);
q = deduplicate_primitives(std::move(q), &changed);
}

return std::move(q);
Expand Down
2 changes: 1 addition & 1 deletion libursa/Version.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ constexpr std::string_view ursadb_format_version = "1.5.0";
// Project version.
// Consider updating the version tag when doing PRs.
// clang-format off
constexpr std::string_view ursadb_version_string = "@PROJECT_VERSION@+opt2";
constexpr std::string_view ursadb_version_string = "@PROJECT_VERSION@+opt3";
// clang-format on

0 comments on commit 18a45d8

Please sign in to comment.