Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backtracking traverser #126

Draft
wants to merge 14 commits into
base: master
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 83 additions & 56 deletions src/exclusion/object_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

#include <algorithm>
#include <functional>
#include <list>
#include <log.hpp>
#include <map>
#include <ostream>
#include <set>
Expand Down Expand Up @@ -37,8 +39,8 @@ class path_trie {

[[nodiscard]] const trie_node *get_child(std::string_view key) const
{
auto it = children_.find(key);
if (it == children_.end()) {
auto it = children.find(key);
if (it == children.end()) {
return nullptr;
}
return &it->second;
Expand All @@ -49,31 +51,28 @@ class path_trie {
std::string_view key, InternString &&intern_str_fun)
{
{
auto it = children_.find(key);
if (it != children_.end()) {
auto it = children.find(key);
if (it != children.end()) {
return {it->second, false};
}
}

auto interned_str = std::forward<InternString>(intern_str_fun)(key);
auto [it, is_new] = children_.emplace(std::piecewise_construct,
auto [it, is_new] = children.emplace(std::piecewise_construct,
std::forward_as_tuple(interned_str), std::forward_as_tuple());
return {std::reference_wrapper{it->second}, true};
}

[[nodiscard]] bool is_terminal() const { return children_.empty(); }
[[nodiscard]] bool is_terminal() const { return children.empty(); }

void clear() { children_.clear(); }

protected:
#ifdef HAS_NONRECURSIVE_UNORDERED_MAP
// unordered_map doesn't allow trie_node as the value of the map
// because trie_node is an incomplete type at this point
template <typename K, typename V> using MapType = std::map<K, V>;
#else
template <typename K, typename V> using MapType = std::unordered_map<K, V>;
#endif
MapType<std::string_view, trie_node> children_{};
MapType<std::string_view, trie_node> children{};
};
static_assert(std::is_move_assignable_v<trie_node>);
static_assert(std::is_move_constructible_v<trie_node>);
Expand All @@ -85,85 +84,113 @@ class path_trie {
public:
enum class state { not_found, found, intermediate_node };

explicit traverser(const trie_node *root)
explicit traverser(trie_node const *root) : cur_node{root} {}

traverser(trie_node const *root, std::list<std::pair<trie_node const *, unsigned>> &&globs,
std::vector<std::string_view> &&stack)
: cur_node{root}, seen_globs(std::move(globs)), key_stack(std::move(stack))
{}

static const trie_node *backtrack(std::string_view next_key,
const std::vector<std::string_view> &stack,
std::list<std::pair<const trie_node *, unsigned>> &globs)
{
if (root != nullptr) {
cur_nodes_.emplace_back(root);
// We have reached this point with a null node, which means
// there is no glob node available, but we still have previously
// seen globs, so we backtrack
for (auto it = globs.begin(); it != globs.end();) {
const trie_node *root = it->first;
for (auto i = it->second; root != nullptr && i < stack.size(); i++) {
root = root->get_child(stack[i]);
}
root = root->get_child(next_key);

// We remove the glob from the list as we're either following it
// or it's not a valid path
it = globs.erase(it);

if (root != nullptr) {
return root;
}
}

return nullptr;
}

[[nodiscard]] traverser descend(std::string_view next_key) const
[[nodiscard]] traverser descend_wildcard() const
{
if (get_state() != state::intermediate_node) {
// once found/not_found, as we descend we keep the state
return *this;
}

std::vector<const trie_node *> next_nodes;
next_nodes.reserve(cur_nodes_.size());

for (const auto *cur_node : cur_nodes_) {
const auto *next_node = cur_node->get_child(next_key);
if (next_node != nullptr) {
if (next_node->is_terminal()) {
return traverser{next_node};
}

next_nodes.emplace_back(next_node);
}
const auto *next_node = cur_node->get_child("*");
if (next_node == nullptr && seen_globs.empty()) {
return traverser{nullptr};
}

const auto *glob_node = cur_node->get_child("*");
if (glob_node != nullptr) {
if (glob_node->is_terminal()) {
return traverser{glob_node};
}
auto globs = seen_globs;
if (next_node == nullptr) {
next_node = backtrack("*", key_stack, globs);
}

next_nodes.emplace_back(glob_node);
}
if (next_node == nullptr || globs.empty()) {
return traverser{next_node};
}

return traverser{std::move(next_nodes)};
auto new_stack = key_stack;
new_stack.emplace_back("*");
return {next_node, std::move(globs), std::move(new_stack)};
}

[[nodiscard]] traverser descend_wildcard() const
[[nodiscard]] traverser descend(std::string_view next_key) const
{
if (get_state() != state::intermediate_node) {
// once found/not_found, as we descend we keep the state
return *this;
}

std::vector<const trie_node *> next_nodes;
next_nodes.reserve(cur_nodes_.size());

for (const auto *cur_node : cur_nodes_) {
const auto *glob_node = cur_node->get_child("*");
if (glob_node != nullptr) {
if (glob_node->is_terminal()) {
return traverser{glob_node};
}
const auto *glob_node = cur_node->get_child("*");
const auto *next_node = cur_node->get_child(next_key);
if (next_node == nullptr) {
if (glob_node == nullptr && seen_globs.empty()) {
return traverser{nullptr};
}
next_node = glob_node;
}

next_nodes.emplace_back(glob_node);
auto globs = seen_globs;
if (next_node == nullptr) {
next_node = backtrack(next_key, key_stack, globs);
} else {
// Find the next glob, the depth should be current + 1
if (glob_node != nullptr && glob_node != next_node) {
globs.emplace_front(glob_node, key_stack.size() + 1);
}
}

return traverser{std::move(next_nodes)};
if (next_node == nullptr || globs.empty()) {
return traverser{next_node};
}

auto new_stack = key_stack;
new_stack.emplace_back(next_key);

return {next_node, std::move(globs), std::move(new_stack)};
}

[[nodiscard]] state get_state() const
{
if (cur_nodes_.empty()) {
if (cur_node == nullptr) {
return state::not_found;
}

if (cur_nodes_.size() == 1 && cur_nodes_.back()->is_terminal()) {
return state::found;
}

return state::intermediate_node;
return cur_node->is_terminal() ? state::found : state::intermediate_node;
}

private:
explicit traverser(std::vector<const trie_node *> &&nodes) : cur_nodes_(std::move(nodes)) {}
std::vector<const trie_node *> cur_nodes_;
trie_node const *cur_node{};
std::list<std::pair<trie_node const *, unsigned>> seen_globs{};
std::vector<std::string_view> key_stack{};
};

template <typename StringType>
Expand All @@ -190,7 +217,7 @@ class path_trie {
}
if (!last_is_new) {
// already existed. If it had children, make it a terminal node
cur->clear();
cur->children.clear();
}
}

Expand Down
Loading