Skip to content

Commit

Permalink
Create the optimizer framework (#219)
Browse files Browse the repository at this point in the history
Create the optimizer framework
  • Loading branch information
msm-cert authored Oct 1, 2024
1 parent 9f2a149 commit 8467f9f
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 5 deletions.
2 changes: 2 additions & 0 deletions libursa/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ add_library(
QueryParser.h
QueryResult.cpp
QueryResult.h
QueryOptimizer.cpp
QueryOptimizer.h
RawFile.cpp
RawFile.h
Responses.cpp
Expand Down
4 changes: 3 additions & 1 deletion libursa/OnDiskDataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "DatabaseName.h"
#include "Json.h"
#include "Query.h"
#include "QueryOptimizer.h"
#include "spdlog/fmt/ostr.h"
#include "spdlog/spdlog.h"

Expand Down Expand Up @@ -91,7 +92,8 @@ void OnDiskDataset::execute(const Query &query, ResultWriter *out,
for (const auto &ndx : get_indexes()) {
types_to_query.emplace(ndx.index_type());
}
const Query plan = query.plan(types_to_query);
Query plan = query.plan(types_to_query);
plan = q_optimize(std::move(plan));
spdlog::debug("PLAN: {}", plan);

QueryResult result = this->query(plan, counters);
Expand Down
7 changes: 7 additions & 0 deletions libursa/Query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,14 @@ const std::vector<Query> &Query::as_queries() const {
type != QueryType::MIN_OF) {
throw std::runtime_error("This query doesn\'t contain subqueries.");
}
return queries;
}

std::vector<Query> &Query::as_queries() {
if (type != QueryType::AND && type != QueryType::OR &&
type != QueryType::MIN_OF) {
throw std::runtime_error("This query doesn\'t contain subqueries.");
}
return queries;
}

Expand Down
5 changes: 3 additions & 2 deletions libursa/Query.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ class PrimitiveQuery {
PrimitiveQuery(IndexType itype, TriGram trigram)
: itype(itype), trigram(trigram) {}

const IndexType itype;
const TriGram trigram;
IndexType itype;
TriGram trigram;

// We want to use PrimitiveQuery in STL containers, and this means they
// must be comparable using <. Specific order doesn't matter.
Expand Down Expand Up @@ -51,6 +51,7 @@ class Query {
Query &operator=(Query &&) = default;

const std::vector<Query> &as_queries() const;
std::vector<Query> &as_queries();
const QString &as_value() const;
uint32_t as_count() const;
std::string as_string_repr() const;
Expand Down
31 changes: 31 additions & 0 deletions libursa/QueryOptimizer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#include "QueryOptimizer.h"

#include <vector>

// Run the optimization pases on subqueries.
// After this step, every subquery should be maximally optimized,
// So I believe there's no need to run this in a loop.
Query simplify_subqueries(Query &&q) {
// q_optimize ensures QueryType is not PRIMITIVE already
std::vector<Query> newqueries;
for (auto &&query : q.as_queries()) {
newqueries.emplace_back(q_optimize(std::move(query)));
}
if (q.get_type() == QueryType::MIN_OF) {
return q_min_of(q.as_count(), std::move(newqueries));
}
return std::move(Query(q.get_type(), std::move(newqueries)));
}

Query q_optimize(Query &&q) {
if (q.get_type() == QueryType::PRIMITIVE) {
// Nothing to improve here.
return std::move(q);
}

q = simplify_subqueries(std::move(q));

// Optimization passes will be added here later.

return std::move(q);
}
8 changes: 8 additions & 0 deletions libursa/QueryOptimizer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#pragma once

#include "Query.h"

// Optimizes a query, and returns the optimized version.
// Optimizations try to simplify the expression in various ways to make the
// execution faster - for example by enabling short-circuiting in some places.
Query q_optimize(Query &&query);
5 changes: 3 additions & 2 deletions libursa/Version.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@ constexpr std::string_view ursadb_format_version = "1.5.0";

// Project version.
// Consider updating the version tag when doing PRs.
constexpr std::string_view ursadb_version_string =
"@PROJECT_VERSION@+debuglogs";
// clang-format off
constexpr std::string_view ursadb_version_string = "@PROJECT_VERSION@+opt0";
// clang-format on

0 comments on commit 8467f9f

Please sign in to comment.