diff --git a/CMakeLists.txt b/CMakeLists.txt index 1c152977547..4036460a4ce 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1086,6 +1086,7 @@ add_library(libtsutil SHARED lib/ts/InkErrno.cc lib/ts/InkErrno.h lib/ts/IntrusiveDList.h + lib/ts/IntrusiveHashMap.h lib/ts/IntrusivePtrTest.cc lib/ts/IpMap.cc lib/ts/IpMap.h @@ -1161,6 +1162,7 @@ add_executable(test_tslib lib/ts/unit-tests/test_History.cc lib/ts/unit-tests/test_ink_inet.cc lib/ts/unit-tests/test_IntrusiveDList.cc + lib/ts/unit-tests/test_IntrusiveHashMap.cc lib/ts/unit-tests/test_IntrusivePtr.cc lib/ts/unit-tests/test_IpMap.cc lib/ts/unit-tests/test_layout.cc diff --git a/doc/conf.py b/doc/conf.py index 1f09e4228e5..8991ed251e3 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -168,6 +168,7 @@ nitpicky = True nitpick_ignore = [ + ('cpp:identifier', 'F') # required for templated method using 'F' ] # Autolink issue references. diff --git a/doc/developer-guide/internal-libraries/index.en.rst b/doc/developer-guide/internal-libraries/index.en.rst index aae63c8ee6b..bc7d3b913b0 100644 --- a/doc/developer-guide/internal-libraries/index.en.rst +++ b/doc/developer-guide/internal-libraries/index.en.rst @@ -33,4 +33,5 @@ development team. scalar.en buffer-writer.en intrusive-list.en + intrusive-hash-map.en MemArena.en diff --git a/doc/developer-guide/internal-libraries/intrusive-hash-map.en.rst b/doc/developer-guide/internal-libraries/intrusive-hash-map.en.rst new file mode 100644 index 00000000000..6ee14134015 --- /dev/null +++ b/doc/developer-guide/internal-libraries/intrusive-hash-map.en.rst @@ -0,0 +1,197 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file distributed with this work for + additional information regarding copyright ownership. The ASF licenses this file to you under the + Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software distributed under the License + is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + or implied. See the License for the specific language governing permissions and limitations under + the License. + +.. include:: ../../common.defs + +.. _lib-intrusive-hash-map: +.. highlight:: cpp +.. default-domain:: cpp + +IntrusiveHashMap +**************** + +:class:`IntrusiveHashMap` provides a "hash" or "unordered" set, using intrusive links. It provides a +container for elements, each of which has a :arg:`key`. A hash function is applied to a key to +generate a :arg:`hash id` which is used to group the elements in to buckets for fast lookup. This +container is a mix of :code:`std::unordered_set` and :code:`std::unordered_map`. There is no +separation between elements and keys, but each element can contain non-key data. + +Iteration over elements is provided and is constant time. + +In order to optimize lookup, the container can increase the number of buckets used. This is called +the "expansion policy" of the container and it can be automatic or controlled externally. + +Usage +***** + +To use an :class:`IntrusiveHashMap` the element must provide support for the container. This is done +through an associated descriptor class which provides the operations needed to manipulate the elements +in the container. + +Examples +======== + +Details +******* + +.. class:: template < typename H > IntrusiveHashMap + + :tparam H: Element operations. + + An unordered map using a hash function. The properties of the map are determined by types and + operations provided by the descriptor type :arg:`H`. The following types are derived from :arg:`H` + and defined in the container type. + + .. type:: value_type + + The type of elements in the container, deduced from the return types of the link accessor methods + in :arg:`H`. + + .. type:: key_type + + The type of the key used for hash computations. Deduced from the return type of the key + accessor. An instance of this type is never default constructed nor modified, therefore it can + be a reference if the key type is expensive to copy. + + .. type:: hash_id + + The type of the hash of a :type:`key_type`. Deduced from the return type of the hash function. + This must be a numeric type. + + :arg:`H` + This describes the hash map, primarily via the operations required for the map. The related types are deduced + from the function return types. This is designed to be compatible with :class:`IntrusiveDList`. + + .. function:: static key_type key_of(value_type * v) + + Key accessor - return the key of the element :arg:`v`. + + .. function:: static hash_id hash_of(key_type key) + + Hash function - compute the hash value of the :arg:`key`. + + .. function:: static bool equal(key_type lhs, key_type rhs) + + Key comparison - two keys are equal if this function returns :code:`true`. + + .. function:: static IntrusiveHashMap::value_type * & next_ptr(IntrusiveHashMap::value_type * v) + + Return a reference to the next element pointer embedded in the element :arg:`v`. + + .. function:: static IntrusiveHashMap::value_type * & prev_ptr(IntrusiveHashMap::value_type * v) + + Return a reference to the previous element pointer embedded in the element :arg:`v`. + + .. type:: iterator + + An STL compliant iterator over elements in the container. + + .. function:: IntrusiveHashMap & insert(value_type * v) + + Insert the element :arg:`v` into the container. + + .. function:: iterator begin() + + Return an iterator to the first element in the container. + + .. function:: iterator end() + + Return an iterator to past the last element in the container. + + .. function:: iterator find(value_type * v) + + Search for :arg:`v` in the container. If found, return an iterator refering to :arg:`v`. If not + return the end iterator. This validates :arg:`v` is in the container. + + .. function:: IntrusiveHashMap & erase(iterator spot) + + Remove the element referred to by :arg:`spot` from the container. + + .. function:: iterator iterator_for(value_type * v) + + Return an iterator for :arg:`v`. This is very fast, faster than :func:`IntrusiveHashMap::find` + but less safe because no validation done on :arg:`v`. If it not in the container (either in no + container or a different one) further iteration on the returned iterator will go badly. It is + useful inside range :code:`for` loops when it is guaranteed the element is in the container. + + .. function:: template IntrusiveHashMap & apply(F && f) + + :tparam F: A functional type with the signature :code:`void (value_type*)`. + + This applies the function :arg:`f` to every element in the container in such a way that + modification of the element does not interfere with the iteration. The most common use is to + :code:`delete` the elements during cleanup. The common idiom :: + + for ( auto & elt : container) delete &elt; + + is problematic because the iteration links are in the deleted element causing the computation + of the next element to be a use after free. Using :func:`IntrusiveHashMap::apply` enables safe + cleanup. :: + + container.apply([](value_type & v) { delete & v; }); + + Because the links are intrusive it is possible for other classes or the element class to + modify them. In such cases this method provides a safe way to invoke such mechanisms. + +Design Notes +************ + +This is a refresh of an previously existing class, :code:`TSHahTable`. The switch to C++ 11 and then +C++ 17 made it possible to do much better in terms of the internal implementation and API. The +overall functionality is the roughly the same but with an easier API, compatiblity with +:class:`IntrusiveDList`, and better internal implementation. + +The biggest change is that elements are stored in a single global list rather than per hash bucket. +The buckets server only as entry points in to the global list and to count the number of elements +per bucket. This simplifies the implementation of iteration, so that the old :code:`Location` nested +class can be removed. Elements with equal keys can be handled in the same way as with STL +containers, via iterator ranges, instead of a custom psuedo-iterator class. + +Notes on :func:`IntrusiveHashMap::apply` +======================================== + +This was added after some experience with use of the container. Initially it was added to make +cleaning up the container easier. Without it, cleanup looks like :: + + for ( auto spot = map.begin(), limit = map.end() ; spot != limit ; delete &( * spot++)) { + ; // empty + } + +Instead one can do :: + + map.apply([](value_type& v) { delete &v; }); + +The post increment operator guarantees that :arg:`spot` has been updated before the current element is destroyed. +However, it turns out to be handy in other map modifying operations. In the unit tests there is +this code + +.. literalinclude:: ../../../lib/ts/unit-tests/test_IntrusiveHashMap.cc + :lines: 129-132 + +This removes all elements that do not have the payload "dup". As another design note, +:func:`IntrusiveHashMap::iterator_for` here serves to bypass validation checking on the target for +:func:`IntrusiveHashMap::erase`, which is proper because :func:`IntrusiveHashMap::apply` guarantees +:arg:`thing` is in the map. + +Without :code:`apply` this is needed :: + + auto idx = map.begin(); + while (idx != map.end()) { + auto x{idx++}; + if ("dup"sv != x->_payload) { + map.erase(x); + } + } + +The latter is more verbose and more importantly less obvious, depending on a subtle interaction with +post increment. diff --git a/iocore/dns/SplitDNS.cc b/iocore/dns/SplitDNS.cc index 6697a6466c4..d98634727d3 100644 --- a/iocore/dns/SplitDNS.cc +++ b/iocore/dns/SplitDNS.cc @@ -148,7 +148,7 @@ SplitDNSConfig::reconfigure() if (nullptr != params->m_DNSSrvrTable->getHostMatcher() && nullptr == params->m_DNSSrvrTable->getReMatcher() && nullptr == params->m_DNSSrvrTable->getIPMatcher() && 4 >= params->m_numEle) { HostLookup *pxHL = params->m_DNSSrvrTable->getHostMatcher()->getHLookup(); - params->m_pxLeafArray = (void *)pxHL->getLArray(); + params->m_pxLeafArray = pxHL->get_leaf_array(); params->m_bEnableFastPath = true; } @@ -236,20 +236,20 @@ SplitDNS::findServer(RequestData *rdata, SplitDNSResult *result) break; } - if (false == pxHL[i].isNot && pxHL[i].len > len) { + if (false == pxHL[i].isNot && static_cast(pxHL[i].match.size()) > len) { continue; } - int idx = len - pxHL[i].len; - char *pH = &pHost[idx]; - char *pMatch = (char *)pxHL[i].match; - char cNot = *pMatch; + int idx = len - pxHL[i].match.size(); + char *pH = &pHost[idx]; + const char *pMatch = pxHL[i].match.data(); + char cNot = *pMatch; if ('!' == cNot) { pMatch++; } - int res = memcmp(pH, pMatch, pxHL[i].len); + int res = memcmp(pH, pMatch, pxHL[i].match.size()); if ((0 != res && '!' == cNot) || (0 == res && '!' != cNot)) { data_ptr = (SplitDNSRecord *)pxHL[i].opaque_data; diff --git a/lib/ts/HostLookup.cc b/lib/ts/HostLookup.cc index 4be5c499ddc..50019ed5242 100644 --- a/lib/ts/HostLookup.cc +++ b/lib/ts/HostLookup.cc @@ -27,58 +27,52 @@ * * ****************************************************************************/ -#include "ts/ink_platform.h" -#include "ts/ink_memory.h" -#include "ts/DynArray.h" +#include +#include +#include #include "ts/ink_inet.h" #include "ts/ink_assert.h" -#include "ts/ink_hash_table.h" -#include "ts/Tokenizer.h" #include "ts/HostLookup.h" -#include "ts/MatcherUtils.h" +#include +#include +using std::string_view; +using ts::TextView; + +namespace +{ // bool domaincmp(const char* hostname, const char* domain) // // Returns true if hostname is in domain // bool -domaincmp(const char *hostname, const char *domain) +domaincmp(string_view hostname, string_view domain) { - ink_assert(hostname != nullptr); - ink_assert(domain != nullptr); - - const char *host_cur = hostname + strlen(hostname); - const char *domain_cur = domain + strlen(domain); - // Check to see if were passed emtpy stings for either // argument. Empty strings do not match anything // - if (domain_cur == domain || host_cur == hostname) { + if (domain.empty() || hostname.empty()) { return false; } - // Go back to the last character - domain_cur--; - host_cur--; + // Walk through both strings backward - explicit declares, need these post-loop. + auto d_idx = domain.rbegin(); + auto h_idx = hostname.rbegin(); // Trailing dots should be ignored since they are optional - // - if (*(domain_cur) == '.') { - domain_cur--; + if (*d_idx == '.') { + ++d_idx; } - if (*(host_cur) == '.') { - host_cur--; + if (*h_idx == '.') { + ++h_idx; } - // Walk through both strings backward - while (domain_cur >= domain && host_cur >= hostname) { - // If we still have characters left on both strings and - // they do not match, matching fails - // - if (tolower(*domain_cur) != tolower(*host_cur)) { + while (d_idx != domain.rend() && h_idx != hostname.rend()) { + // If we still have characters left on both strings and they do not match, matching fails + if (tolower(*d_idx) != tolower(*h_idx)) { return false; } - domain_cur--; - host_cur--; + ++d_idx; + ++h_idx; }; // There are three possible cases that could have gotten us @@ -88,41 +82,22 @@ domaincmp(const char *hostname, const char *domain) // Case 2: we ran out of domain but not hostname // Case 3: we ran out of hostname but not domain // - if (domain_cur < domain) { - if (host_cur < hostname) { - // This covers the case 1 - // ex: example.com matching example.com - return true; - } else { - // This covers case 2 (the most common case): - // ex: www.example.com matching .com or com - // But we must check that we do match - // www.inktomi.ecom against com - // - if (*(domain_cur + 1) == '.') { - return true; - } else if (*host_cur == '.') { - return true; - } else { - return false; - } - } - } else if (host_cur < hostname) { + if (d_idx == domain.rend()) { + // If end of hostname also, then case 1 match. + // Otherwise it's a case 2 match iff last character match was at a domain boundary. + // (ex: avoid 'www.inktomi.ecom' matching 'com') + // note: d_idx[-1] == '.' --> h_idx[-1] == '.' because of match check in loop. + return h_idx == hostname.rend() || *h_idx == '.' || *(d_idx - 1) == '.'; + } else if (h_idx == hostname.rend()) { // This covers the case 3 (a very unusual case) // ex: example.com needing to match .example.com - if (*domain_cur == '.' && domain_cur == domain) { - return true; - } else { - return false; - } + return *d_idx == '.' && d_idx + 1 == domain.rend(); } ink_assert(!"Should not get here"); return false; } -// int hostcmp(const char* c1, const char* c2) -// // Similar to strcasecmp except that if one string has a // trailing '.' and the other one does not // then they are equal @@ -133,33 +108,37 @@ domaincmp(const char *hostname, const char *domain) // since the trailing dot is optional // int -hostcmp(const char *c1, const char *c2) +hostcmp(string_view lhs, string_view rhs) { - ink_assert(c1 != nullptr); - ink_assert(c2 != nullptr); - do { - if (tolower(*c1) < tolower(*c2)) { - if (*c1 == '\0' && *c2 == '.' && *(c2 + 1) == '\0') { - break; - } + ink_assert(!lhs.empty()); + ink_assert(!rhs.empty()); + + // ignore any trailing . + if (lhs.back() == '.') { + lhs.remove_suffix(1); + } + if (rhs.back() == '.') { + rhs.remove_suffix(1); + } + + auto lidx = lhs.begin(); + auto ridx = rhs.begin(); + while (lidx != lhs.end() && ridx != rhs.end()) { + char lc(tolower(*lidx)); + char rc(tolower(*ridx)); + if (lc < rc) { return -1; - } else if (tolower(*c1) > tolower(*c2)) { - if (*c2 == '\0' && *c1 == '.' && *(c1 + 1) == '\0') { - break; - } + } else if (lc > rc) { return 1; } - - if (*c1 == '\0') { - break; - } - c1++; - c2++; - } while (true); - - return 0; + ++lidx; + ++ridx; + } + return lidx != lhs.end() ? 1 : ridx != rhs.end() ? -1 : 0; } +} // namespace + // static const unsigned char asciiToTable[256] // // Used to Map Legal hostname characters into @@ -200,74 +179,19 @@ static const unsigned char asciiToTable[256] = { // Number of legal characters in the acssiToTable array static const int numLegalChars = 38; -// struct charIndex_el -// -// Used by class charIndex. Forms a single level -// in charIndex tree -// -struct charIndex_el { - charIndex_el(); - ~charIndex_el(); - HostBranch *branch_array[numLegalChars]; - charIndex_el *next_level[numLegalChars]; -}; - -charIndex_el::charIndex_el() -{ - memset(branch_array, 0, sizeof(branch_array)); - memset(next_level, 0, sizeof(next_level)); -} - -charIndex_el::~charIndex_el() -{ - int i; - - // Recursively delete all the lower levels of the - // data structure - for (i = 0; i < numLegalChars; i++) { - if (next_level[i] != nullptr) { - delete next_level[i]; - } - } -} - -// struct charIndexIterInternal -// -// An internal struct to charIndexIterState -// Stores the location of an element in -// class charIndex -// -struct charIndexIterInternal { - charIndex_el *ptr; - int index; -}; - -// Used as a default return element for DynArray in -// struct charIndexIterState -static charIndexIterInternal default_iter = {nullptr, -1}; - -// struct charIndexIterState +// struct CharIndexBlock // -// struct for the callee to keep interation state -// for class charIndex +// Used by class CharIndex. Forms a single level in CharIndex tree // -struct charIndexIterState { - charIndexIterState(); - - // Where that level we are in interation - int cur_level; - - // Where we got the last element from - int cur_index; - charIndex_el *cur_el; - - // Queue of the above levels - DynArray q; +struct CharIndexBlock { + struct Item { + HostBranch *branch{nullptr}; + std::unique_ptr block; + }; + std::array array; }; -charIndexIterState::charIndexIterState() : cur_level(-1), cur_index(-1), cur_el(nullptr), q(&default_iter, 6) {} - -// class charIndex - A constant time string matcher intended for +// class CharIndex - A constant time string matcher intended for // short strings in a sparsely populated DNS paritition // // Creates a look up table for character in data string @@ -282,11 +206,11 @@ charIndexIterState::charIndexIterState() : cur_level(-1), cur_index(-1), cur_el( // Example: com // c maps to 13, o maps to 25, m maps to 23 // -// charIndex_el charIndex_el +// CharIndexBlock CharIndexBlock // ----------- ------------ // 0 | | | | | | // . | | | | | | -// charIndex_el . | | | | | | +// CharIndexBlock . | | | | | | // ---------- . | | | | | | // 0 | | | . | | | |-->23| ptr| 0 | (ptr is to the // . | | | |-------->25| 0 | -----| | | | hostBranch for @@ -303,478 +227,365 @@ charIndexIterState::charIndexIterState() : cur_level(-1), cur_index(-1), cur_el( // // // -class charIndex +class CharIndex { public: - charIndex(); - ~charIndex(); - void Insert(const char *match_data, HostBranch *toInsert); - HostBranch *Lookup(const char *match_data); - HostBranch *iter_first(charIndexIterState *s); - HostBranch *iter_next(charIndexIterState *s); + struct iterator : public std::iterator { + using self_type = iterator; + + struct State { + int index{-1}; + CharIndexBlock *block{nullptr}; + }; + + iterator() { q.reserve(HOST_TABLE_DEPTH * 2); } // was 6, guessing that was twice the table depth. + + value_type *operator->(); + value_type &operator*(); + bool operator==(self_type const &that) const; + bool operator!=(self_type const &that) const; + self_type &operator++(); + + // Current level. + int cur_level{-1}; + + // Where we got the last element from + State state; + + // Queue of the above levels + std::vector q; + + // internal methods + self_type &advance(); + }; + + ~CharIndex(); + void Insert(string_view match_data, HostBranch *toInsert); + HostBranch *Lookup(string_view match_data); + + iterator begin(); + iterator end(); private: - charIndex_el *root; - InkHashTable *illegalKey; + CharIndexBlock root; + using Table = ts::IntrusiveHashMap; + std::unique_ptr illegalKey; }; -charIndex::charIndex() : illegalKey(nullptr) +CharIndex::~CharIndex() { - root = new charIndex_el; -} - -charIndex::~charIndex() -{ - InkHashTableIteratorState ht_iter; - InkHashTableEntry *ht_entry = nullptr; - HostBranch *tmp; - - delete root; - - // Destroy the illegalKey hashtable if there is one and free - // up all of its values - if (illegalKey != nullptr) { - ht_entry = ink_hash_table_iterator_first(illegalKey, &ht_iter); - - while (ht_entry != nullptr) { - tmp = (HostBranch *)ink_hash_table_entry_value(illegalKey, ht_entry); - ink_assert(tmp != nullptr); - delete tmp; - ht_entry = ink_hash_table_iterator_next(illegalKey, &ht_iter); - } - ink_hash_table_destroy(illegalKey); + // clean up the illegal key table. + if (illegalKey) { + for (auto spot = illegalKey->begin(), limit = illegalKey->end(); spot != limit; delete &*(spot++)) + ; // empty } } -// void charIndex::Insert(const char* match_data, HostBranch* toInsert) +// void CharIndex::Insert(const char* match_data, HostBranch* toInsert) // // Places a binding for match_data to toInsert into the index // void -charIndex::Insert(const char *match_data, HostBranch *toInsert) +CharIndex::Insert(string_view match_data, HostBranch *toInsert) { unsigned char index; - const char *match_start = match_data; - charIndex_el *cur = root; - charIndex_el *next; + CharIndexBlock *cur = &root; - if (*match_data == '\0') { - // Should not happen - ink_assert(0); - return; - } + ink_assert(!match_data.empty()); - while (true) { - index = asciiToTable[(unsigned char)(*match_data)]; - - // Check to see if our index into table is for an - // 'illegal' DNS character - if (index == 255) { - // Insert into illgals hash table - if (illegalKey == nullptr) { - illegalKey = ink_hash_table_create(InkHashTableKeyType_String); - } - ink_hash_table_insert(illegalKey, (char *)match_start, toInsert); - break; + if (std::any_of(match_data.begin(), match_data.end(), [](unsigned char c) { return asciiToTable[c] == 255; })) { + // Insert into illegals hash table + if (illegalKey == nullptr) { + illegalKey.reset(new Table); } + toInsert->key = match_data; + illegalKey->insert(toInsert); + } else { + while (true) { + index = asciiToTable[static_cast(match_data.front())]; + + // Check to see if are at the level we supposed be at + if (match_data.size() == 1) { + // The slot should always be emtpy, no duplicate keys are allowed + ink_assert(cur->array[index].branch == nullptr); + cur->array[index].branch = toInsert; + break; + } else { + // We need to find the next level in the table - // Check to see if are at the level we supposed be at - if (*(match_data + 1) == '\0') { - // The slot should always be emtpy, no duplicate - // keys are allowed - ink_assert(cur->branch_array[index] == nullptr); - cur->branch_array[index] = toInsert; - break; - } else { - // We need to find the next level in the table - - next = cur->next_level[index]; + CharIndexBlock *next = cur->array[index].block.get(); - // Check to see if we need to expand the table - if (next == nullptr) { - next = new charIndex_el; - cur->next_level[index] = next; + // Check to see if we need to expand the table + if (next == nullptr) { + next = new CharIndexBlock; + cur->array[index].block.reset(next); + } + cur = next; } - cur = next; + match_data.remove_prefix(1); } - match_data++; } } -// HostBranch* charIndex::Lookup(const char* match_data) +// HostBranch* CharIndex::Lookup(const char* match_data) // -// Searches the charIndex on key match_data +// Searches the CharIndex on key match_data // If there is a binding for match_data, returns a pointer to it // otherwise a nullptr pointer is returned // HostBranch * -charIndex::Lookup(const char *match_data) +CharIndex::Lookup(string_view match_data) { - unsigned char index; - charIndex_el *cur = root; - void *hash_lookup; - const char *match_start = match_data; - - if (root == nullptr || *match_data == '\0') { + if (match_data.empty()) { return nullptr; } - while (true) { - index = asciiToTable[(unsigned char)(*match_data)]; - - // Check to see if our index into table is for an - // 'illegal' DNS character - if (index == 255) { - if (illegalKey == nullptr) { - return nullptr; - } else { - if (ink_hash_table_lookup(illegalKey, (char *)match_start, &hash_lookup)) { - return (HostBranch *)hash_lookup; - } else { - return nullptr; - } + if (std::any_of(match_data.begin(), match_data.end(), [](unsigned char c) { return asciiToTable[c] == 255; })) { + if (illegalKey) { + auto spot = illegalKey->find(match_data); + if (spot != illegalKey->end()) { + return &*spot; } } + return nullptr; + } + + // Invariant: No invalid characters. + CharIndexBlock *cur = &root; + while (cur) { + unsigned char index = asciiToTable[static_cast(match_data.front())]; + // Check to see if we are looking for the next level or // a HostBranch - if (*(match_data + 1) == '\0') { - return cur->branch_array[index]; + if (match_data.size() == 1) { + return cur->array[index].branch; } else { - cur = cur->next_level[index]; - - if (cur == nullptr) { - return nullptr; - } + cur = cur->array[index].block.get(); } + match_data.remove_prefix(1); + } + return nullptr; +} - match_data++; +auto +CharIndex::begin() -> iterator +{ + iterator zret; + zret.state.block = &root; + zret.state.index = 0; + zret.cur_level = 0; + if (root.array[0].branch == nullptr) { + zret.advance(); } + return zret; } -// -// HostBranch* charIndex::iter_next(charIndexIterState* s) -// -// Initialize iterator state and returns the first element -// found in the charTable. If none is found, nullptr -// is returned -// -HostBranch * -charIndex::iter_first(charIndexIterState *s) +auto +CharIndex::end() -> iterator +{ + return {}; +} + +auto CharIndex::iterator::operator-> () -> value_type * { - s->cur_level = 0; - s->cur_index = -1; - s->cur_el = root; + ink_assert(state.block != nullptr); // clang! + return state.block->array[state.index].branch; +} - return iter_next(s); +auto CharIndex::iterator::operator*() -> value_type & +{ + ink_assert(state.block != nullptr); // clang! + return *(state.block->array[state.index].branch); } // -// HostBranch* charIndex::iter_next(charIndexIterState* s) +// HostBranch* CharIndex::iter_next(CharIndexIterState* s) // // Finds the next element in the char index and returns // a pointer to it. If there are no more elements, nullptr // is returned // -HostBranch * -charIndex::iter_next(charIndexIterState *s) +auto +CharIndex::iterator::advance() -> self_type & { - int index; - charIndex_el *current_el = s->cur_el; - intptr_t level = s->cur_level; - charIndexIterInternal stored_state; - HostBranch *r = nullptr; - bool first_element; - - // bool first_element is used to tell if first elemente - // pointed to by s has already been returned to caller - // it has unless we are being called from iter_first - if (s->cur_index < 0) { - first_element = false; - index = s->cur_index + 1; - } else { - first_element = true; - index = s->cur_index; - } - - while (true) { + bool check_branch_p{false}; // skip local branch on the first loop. + do { // Check to see if we need to go back up a level - if (index >= numLegalChars) { - if (level <= 0) { - // No more levels so bail out + if (state.index >= numLegalChars) { + if (cur_level <= 0) { // No more levels so bail out + state.block = nullptr; // carefully make this @c equal to the end iterator. + state.index = -1; break; - } else { - // Go back up to a stored level - stored_state = s->q[level - 1]; - ink_assert(stored_state.ptr != nullptr); - ink_assert(stored_state.index >= 0); - level--; - current_el = stored_state.ptr; - index = stored_state.index + 1; + } else { // Go back up to a stored level + state = q[--cur_level]; + ++state.index; // did that one before descending. } + } else if (check_branch_p && state.block->array[state.index].branch != nullptr) { + // Note: we check for a branch on this level before a descending a level so that when we come back up + // this level will be done with this index. + break; + } else if (state.block->array[state.index].block != nullptr) { + // There is a lower level block to iterate over, store our current state and descend + q[cur_level++] = state; + state.block = state.block->array[state.index].block.get(); + state.index = 0; } else { - // Check to see if there is something to return - // - // Note: we check for something to return before a descending - // a level so that when we come back up we will - // be done with this index - // - if (current_el->branch_array[index] != nullptr && first_element == false) { - r = current_el->branch_array[index]; - s->cur_level = level; - s->cur_index = index; - s->cur_el = current_el; - break; - } else if (current_el->next_level[index] != nullptr) { - // There is a lower level to iterate over, store our - // current state and descend - stored_state.ptr = current_el; - stored_state.index = index; - s->q(level) = stored_state; - current_el = current_el->next_level[index]; - index = 0; - level++; - } else { - // Nothing here so advance to next index - index++; - } + ++state.index; } - first_element = false; - } + check_branch_p = true; + } while (true); + return *this; +} - return r; +auto +CharIndex::iterator::operator++() -> self_type & +{ + return this->advance(); } -// class hostArray +bool +CharIndex::iterator::operator==(const self_type &that) const +{ + return this->state.block == that.state.block && this->state.index == that.state.index; +} + +bool +CharIndex::iterator::operator!=(const self_type &that) const +{ + return !(*this == that); +} + +// class HostArray // // Is a fixed size array for holding HostBrach* // Allows only sequential access to data // -// Since the only iter state is an index into the -// array typedef it -using hostArrayIterState = int; - -class hostArray +class HostArray { + /// Element of the @c HostArray. + struct Item { + HostBranch *branch{nullptr}; ///< Next branch. + std::string match_data; ///< Match data for that branch. + }; + using Array = std::array; + public: - hostArray(); - ~hostArray(); - bool Insert(const char *match_data_in, HostBranch *toInsert); - HostBranch *Lookup(const char *match_data_in, bool bNotProcess); - HostBranch *iter_first(hostArrayIterState *s, char **key = nullptr); - HostBranch *iter_next(hostArrayIterState *s, char **key = nullptr); + bool Insert(string_view match_data_in, HostBranch *toInsert); + HostBranch *Lookup(string_view match_data_in, bool bNotProcess); + + Array::iterator + begin() + { + return array.begin(); + } + Array::iterator + end() + { + return array.begin() + _size; + } private: - int num_el; // number of elements currently in the array - HostBranch *branch_array[HOST_ARRAY_MAX]; - char *match_data[HOST_ARRAY_MAX]; + int _size{0}; // number of elements currently in the array + Array array; }; -hostArray::hostArray() : num_el(0) -{ - memset(branch_array, 0, sizeof(branch_array)); - memset(match_data, 0, sizeof(match_data)); -} - -hostArray::~hostArray() -{ - for (int i = 0; i < num_el; i++) { - ink_assert(branch_array[i] != nullptr); - ink_assert(match_data[i] != nullptr); - ats_free(match_data[i]); - } -} - -// bool hostArray::Insert(const char* match_data_in, HostBranch* toInsert) +// bool HostArray::Insert(const char* match_data_in, HostBranch* toInsert) // // Places toInsert into the array with key match_data if there // is adequate space, in which case true is returned // If space is inadequate, false is returned and nothing is inserted // bool -hostArray::Insert(const char *match_data_in, HostBranch *toInsert) +HostArray::Insert(string_view match_data, HostBranch *toInsert) { - if (num_el >= HOST_ARRAY_MAX) { - return false; - } else { - branch_array[num_el] = toInsert; - match_data[num_el] = ats_strdup(match_data_in); - num_el++; + if (_size < static_cast(array.size())) { + array[_size].branch = toInsert; + array[_size].match_data = match_data; + ++_size; return true; } + return false; } -// HostBranch* hostArray::Lookup(const char* match_data_in) +// HostBranch* HostArray::Lookup(const char* match_data_in) // // Looks for key match_data_in. If a binding is found, // returns HostBranch* found to the key, otherwise // nullptr is returned // HostBranch * -hostArray::Lookup(const char *match_data_in, bool bNotProcess) +HostArray::Lookup(string_view match_data_in, bool bNotProcess) { HostBranch *r = nullptr; - char *pMD; + string_view pMD; - for (int i = 0; i < num_el; i++) { - pMD = match_data[i]; + for (int i = 0; i < _size; i++) { + pMD = array[i].match_data; - if (bNotProcess && '!' == *pMD) { - char *cp = ++pMD; - if ('\0' == *cp) { + if (bNotProcess && '!' == pMD.front()) { + pMD.remove_prefix(1); + if (pMD.empty()) { continue; } - if (strcmp(cp, match_data_in) != 0) { - r = branch_array[i]; - } else { - continue; + if (pMD == match_data_in) { + r = array[i].branch; } - } - - else if (strcmp(match_data[i], match_data_in) == 0) { - r = branch_array[i]; + } else if (pMD == match_data_in) { + r = array[i].branch; break; } } return r; } -// HostBranch* hostArray::iter_first(hostArrayIterState* s) { -// -// Initilizes s and returns the first element or -// nullptr if no elements exist -// -HostBranch * -hostArray::iter_first(hostArrayIterState *s, char **key) -{ - *s = -1; - return iter_next(s, key); -} - -// HostBranch* hostArray::iter_next(hostArrayIterState* s) { -// -// Returns the next element in the hostArray or -// nullptr if none exist -// -HostBranch * -hostArray::iter_next(hostArrayIterState *s, char **key) -{ - (*s)++; - - if (*s < num_el) { - if (key != nullptr) { - *key = match_data[*s]; - } - return branch_array[*s]; - } else { - return nullptr; - } -} - // maps enum LeafType to strings const char *LeafTypeStr[] = {"Leaf Invalid", "Host (Partial)", "Host (Full)", "Domain (Full)", "Domain (Partial)"}; -static int negative_one = -1; - -HostBranch::HostBranch() : level(0), type(HOST_TERMINAL), next_level(nullptr), leaf_indexs(&negative_one, 1) {} - // HostBranch::~HostBranch() // // Recursive delete all host branches below us // HostBranch::~HostBranch() { - // Hash Iteration - InkHashTable *ht; - InkHashTableIteratorState ht_iter; - InkHashTableEntry *ht_entry = nullptr; - - // charIndex Iteration - charIndexIterState ci_iter; - charIndex *ci; - - // hostArray Iteration - hostArray *ha; - hostArrayIterState ha_iter; - - HostBranch *lower_branch; - switch (type) { case HOST_TERMINAL: - ink_assert(next_level == nullptr); break; - case HOST_HASH: - ink_assert(next_level != nullptr); - ht = (InkHashTable *)next_level; - ht_entry = ink_hash_table_iterator_first(ht, &ht_iter); - - while (ht_entry != nullptr) { - lower_branch = (HostBranch *)ink_hash_table_entry_value(ht, ht_entry); - delete lower_branch; - ht_entry = ink_hash_table_iterator_next(ht, &ht_iter); - } - ink_hash_table_destroy(ht); - break; - case HOST_INDEX: - ink_assert(next_level != nullptr); - ci = (charIndex *)next_level; - lower_branch = ci->iter_first(&ci_iter); - while (lower_branch != nullptr) { - delete lower_branch; - lower_branch = ci->iter_next(&ci_iter); + case HOST_HASH: { + HostTable *ht = next_level._table; + for (auto spot = ht->begin(), limit = ht->end(); spot != limit; delete &*(spot++)) { + } // empty + delete ht; + } break; + case HOST_INDEX: { + CharIndex *ci = next_level._index; + for (auto &branch : *ci) { + delete &branch; } delete ci; - break; + } break; case HOST_ARRAY: - ink_assert(next_level != nullptr); - ha = (hostArray *)next_level; - lower_branch = ha->iter_first(&ha_iter); - while (lower_branch != nullptr) { - delete lower_branch; - lower_branch = ha->iter_next(&ha_iter); + for (auto &item : *next_level._array) { + delete item.branch; } - delete ha; + delete next_level._array; break; } } -HostLookup::HostLookup(const char *name) : leaf_array(nullptr), array_len(-1), num_el(-1), matcher_name(name) -{ - root = new HostBranch; - root->level = 0; - root->type = HOST_TERMINAL; - root->next_level = nullptr; -} - -HostLookup::~HostLookup() -{ - if (leaf_array != nullptr) { - // Free up the match strings - for (int i = 0; i < num_el; i++) { - ats_free(leaf_array[i].match); - } - delete[] leaf_array; - } - - delete root; -} - -static void -empty_print_fn(void * /* opaque_data ATS_UNUSED */) -{ -} +HostLookup::HostLookup(string_view name) : matcher_name(name) {} void HostLookup::Print() { - Print(empty_print_fn); + Print([](void *) -> void {}); } void -HostLookup::Print(HostLookupPrintFunc f) +HostLookup::Print(PrintFunc const &f) { - PrintHostBranch(root, f); + PrintHostBranch(&root, f); } // @@ -784,62 +595,31 @@ HostLookup::Print(HostLookupPrintFunc f) // and print out each element // void -HostLookup::PrintHostBranch(HostBranch *hb, HostLookupPrintFunc f) +HostLookup::PrintHostBranch(HostBranch *hb, PrintFunc const &f) { - // Hash iteration - InkHashTable *ht; - InkHashTableIteratorState ht_iter; - InkHashTableEntry *ht_entry = nullptr; - - // charIndex Iteration - charIndexIterState ci_iter; - charIndex *ci; - - // hostArray Iteration - hostArray *h_array; - hostArrayIterState ha_iter; - - HostBranch *lower_branch; - intptr_t curIndex; - intptr_t i; // Loop var - - for (i = 0; i < hb->leaf_indexs.length(); i++) { - curIndex = hb->leaf_indexs[i]; - printf("\t\t%s for %s\n", LeafTypeStr[leaf_array[curIndex].type], leaf_array[curIndex].match); + for (auto curIndex : hb->leaf_indices) { + auto &leaf{leaf_array[curIndex]}; + printf("\t\t%s for %.*s\n", LeafTypeStr[leaf.type], static_cast(leaf.match.size()), leaf.match.data()); f(leaf_array[curIndex].opaque_data); } switch (hb->type) { - case HOST_TERMINAL: - ink_assert(hb->next_level == nullptr); + case HostBranch::HOST_TERMINAL: + ink_assert(hb->next_level._ptr == nullptr); break; - case HOST_HASH: - ink_assert(hb->next_level != nullptr); - ht = (InkHashTable *)hb->next_level; - ht_entry = ink_hash_table_iterator_first(ht, &ht_iter); - - while (ht_entry != nullptr) { - lower_branch = (HostBranch *)ink_hash_table_entry_value(ht, ht_entry); - PrintHostBranch(lower_branch, f); - ht_entry = ink_hash_table_iterator_next(ht, &ht_iter); + case HostBranch::HOST_HASH: + for (auto &branch : *(hb->next_level._table)) { + PrintHostBranch(&branch, f); } break; - case HOST_INDEX: - ink_assert(hb->next_level != nullptr); - ci = (charIndex *)hb->next_level; - lower_branch = ci->iter_first(&ci_iter); - while (lower_branch != nullptr) { - PrintHostBranch(lower_branch, f); - lower_branch = ci->iter_next(&ci_iter); + case HostBranch::HOST_INDEX: + for (auto &branch : *(hb->next_level._index)) { + PrintHostBranch(&branch, f); } break; - case HOST_ARRAY: - ink_assert(hb->next_level != nullptr); - h_array = (hostArray *)hb->next_level; - lower_branch = h_array->iter_first(&ha_iter); - while (lower_branch != nullptr) { - PrintHostBranch(lower_branch, f); - lower_branch = h_array->iter_next(&ha_iter); + case HostBranch::HOST_ARRAY: + for (auto &item : *(hb->next_level._array)) { + PrintHostBranch(item.branch, f); } break; } @@ -854,23 +634,18 @@ HostLookup::PrintHostBranch(HostBranch *hb, HostLookupPrintFunc f) // HostBranch // HostBranch * -HostLookup::TableNewLevel(HostBranch *from, const char *level_data) +HostLookup::TableNewLevel(HostBranch *from, string_view level_data) { - hostArray *new_ha_table; - charIndex *new_ci_table; + ink_assert(from->type == HostBranch::HOST_TERMINAL); - ink_assert(from->type == HOST_TERMINAL); - - // Use the charIndex for high speed matching at the first level of + // Use the CharIndex for high speed matching at the first level of // the table. The first level is short strings, ie: com, edu, jp, fr - if (from->level == 0) { - new_ci_table = new charIndex; - from->type = HOST_INDEX; - from->next_level = new_ci_table; + if (from->level_idx == 0) { + from->type = HostBranch::HOST_INDEX; + from->next_level._index = new CharIndex; } else { - new_ha_table = new hostArray; - from->type = HOST_ARRAY; - from->next_level = new_ha_table; + from->type = HostBranch::HOST_ARRAY; + from->next_level._array = new HostArray; } return InsertBranch(from, level_data); @@ -885,56 +660,40 @@ HostLookup::TableNewLevel(HostBranch *from, const char *level_data) // by class HostMatcher // HostBranch * -HostLookup::InsertBranch(HostBranch *insert_in, const char *level_data) +HostLookup::InsertBranch(HostBranch *insert_in, string_view level_data) { - // Variables for moving an array into a hash table after it - // gets too big - // - hostArray *ha; - hostArrayIterState ha_iter; - HostBranch *tmp; - char *key = nullptr; - InkHashTable *new_ht; - HostBranch *new_branch = new HostBranch; - new_branch->type = HOST_TERMINAL; - new_branch->level = insert_in->level + 1; - new_branch->next_level = nullptr; + new_branch->key = level_data; + new_branch->type = HostBranch::HOST_TERMINAL; + new_branch->level_idx = insert_in->level_idx + 1; switch (insert_in->type) { - case HOST_TERMINAL: + case HostBranch::HOST_TERMINAL: // Should not happen ink_release_assert(0); break; - case HOST_HASH: - ink_hash_table_insert((InkHashTable *)insert_in->next_level, (char *)level_data, new_branch); + case HostBranch::HOST_HASH: + insert_in->next_level._table->insert(new_branch); break; - case HOST_INDEX: - ((charIndex *)insert_in->next_level)->Insert(level_data, new_branch); + case HostBranch::HOST_INDEX: + insert_in->next_level._index->Insert(level_data, new_branch); break; - case HOST_ARRAY: - if (((hostArray *)insert_in->next_level)->Insert(level_data, new_branch) == false) { + case HostBranch::HOST_ARRAY: { + auto array = insert_in->next_level._array; + if (array->Insert(level_data, new_branch) == false) { // The array is out of space, time to move to a hash table - ha = (hostArray *)insert_in->next_level; - new_ht = ink_hash_table_create(InkHashTableKeyType_String); - ink_hash_table_insert(new_ht, (char *)level_data, new_branch); - - // Iterate through the existing elements in the array and - // stuff them into the hash table - tmp = ha->iter_first(&ha_iter, &key); - ink_assert(tmp != nullptr); - while (tmp != nullptr) { - ink_assert(key != nullptr); - ink_hash_table_insert(new_ht, key, tmp); - tmp = ha->iter_next(&ha_iter, &key); + auto ha = insert_in->next_level._array; + auto ht = new HostTable; + ht->insert(new_branch); + for (auto &item : *array) { + ht->insert(item.branch); } - // Ring out the old, ring in the new delete ha; - insert_in->next_level = new_ht; - insert_in->type = HOST_HASH; + insert_in->next_level._table = ht; + insert_in->type = HostBranch::HOST_HASH; } - break; + } break; } return new_branch; @@ -949,37 +708,25 @@ HostLookup::InsertBranch(HostBranch *insert_in, const char *level_data) // otherwise returns nullptr // HostBranch * -HostLookup::FindNextLevel(HostBranch *from, const char *level_data, bool bNotProcess) +HostLookup::FindNextLevel(HostBranch *from, string_view level_data, bool bNotProcess) { HostBranch *r = nullptr; - InkHashTable *hash; - charIndex *ci_table; - hostArray *ha_table; - void *lookup; switch (from->type) { - case HOST_TERMINAL: + case HostBranch::HOST_TERMINAL: // Should not happen ink_assert(0); - return nullptr; - case HOST_HASH: - hash = (InkHashTable *)from->next_level; - ink_assert(hash != nullptr); - if (ink_hash_table_lookup(hash, (char *)level_data, &lookup)) { - r = (HostBranch *)lookup; - } else { - r = nullptr; - } break; - case HOST_INDEX: - ci_table = (charIndex *)from->next_level; - ink_assert(ci_table != nullptr); - r = ci_table->Lookup(level_data); + case HostBranch::HOST_HASH: { + auto table = from->next_level._table; + auto spot = table->find(level_data); + r = spot == table->end() ? nullptr : &*spot; + } break; + case HostBranch::HOST_INDEX: + r = from->next_level._index->Lookup(level_data); break; - case HOST_ARRAY: - ha_table = (hostArray *)from->next_level; - ink_assert(ha_table != nullptr); - r = ha_table->Lookup(level_data, bNotProcess); + case HostBranch::HOST_ARRAY: + r = from->next_level._array->Lookup(level_data, bNotProcess); break; } return r; @@ -993,34 +740,25 @@ HostLookup::FindNextLevel(HostBranch *from, const char *level_data, bool bNotPro // the elements corresponding to match_data // void -HostLookup::TableInsert(const char *match_data, int index, bool domain_record) +HostLookup::TableInsert(string_view match_data, int index, bool domain_record) { - HostBranch *cur = this->root; + HostBranch *cur = &root; HostBranch *next; - char *match_copy = ats_strdup(match_data); - Tokenizer match_tok("."); - int numTok; - int i; - - LowerCaseStr(match_copy); - numTok = match_tok.Initialize(match_copy, SHARE_TOKS); + TextView match{match_data}; // Traverse down the search structure until we either // Get beyond the fixed number depth of the host table // OR We reach the level where the match stops // - for (i = 0; i < HOST_TABLE_DEPTH; i++) { - // Check to see we need to stop at the current level - if (numTok == cur->level) { - break; - } + for (int i = 0; !match.rtrim('.').empty() && i < HOST_TABLE_DEPTH; ++i) { + TextView token{match.take_suffix_at('.')}; - if (cur->next_level == nullptr) { - cur = TableNewLevel(cur, match_tok[numTok - i - 1]); + if (cur->next_level._ptr == nullptr) { + cur = TableNewLevel(cur, token); } else { - next = FindNextLevel(cur, match_tok[numTok - i - 1]); + next = FindNextLevel(cur, token); if (next == nullptr) { - cur = InsertBranch(cur, match_tok[numTok - i - 1]); + cur = InsertBranch(cur, token); } else { cur = next; } @@ -1044,76 +782,71 @@ HostLookup::TableInsert(const char *match_data, int index, bool domain_record) // it had too elements. A comparison must be done at the // leaf node to make sure we have a match if (domain_record == false) { - if (numTok > HOST_TABLE_DEPTH) { - leaf_array[index].type = HOST_PARTIAL; + if (match.empty()) { + leaf_array[index].type = HostLeaf::HOST_PARTIAL; } else { - leaf_array[index].type = HOST_COMPLETE; + leaf_array[index].type = HostLeaf::HOST_COMPLETE; } } else { - if (numTok > HOST_TABLE_DEPTH) { - leaf_array[index].type = DOMAIN_PARTIAL; + if (match.empty()) { + leaf_array[index].type = HostLeaf::DOMAIN_PARTIAL; } else { - leaf_array[index].type = DOMAIN_COMPLETE; + leaf_array[index].type = HostLeaf::DOMAIN_COMPLETE; } } // Place the index in to leaf array into the match list for this // HOST_BRANCH - cur->leaf_indexs(cur->leaf_indexs.length()) = index; - - ats_free(match_copy); + cur->leaf_indices.push_back(index); } // bool HostLookup::MatchArray(HostLookupState* s, void**opaque_ptr, DynArray& array, // bool host_done) // -// Helper function to iterate throught arg array and update Result -// for each element in arg array +// Helper function to iterate through arg array and update Result for each element in arg array // -// host_done should be passed as true if this call represents the all fields -// in the matched against hostname being consumed. Example: for www.example.com -// this would be true for the call matching against the "www", but -// neither of the prior two fields, "inktomi" and "com" +// host_done should be passed as true if this call represents the all fields in the matched against hostname being +// consumed. Example: for www.example.com this would be true for the call matching against the "www", but neither of +// the prior two fields, "inktomi" and "com" // bool -HostLookup::MatchArray(HostLookupState *s, void **opaque_ptr, DynArray &array, bool host_done) +HostLookup::MatchArray(HostLookupState *s, void **opaque_ptr, LeafIndices &array, bool host_done) { - intptr_t index; - intptr_t i; + size_t i; - for (i = s->array_index + 1; i < array.length(); i++) { - index = array[i]; + for (i = s->array_index + 1; i < array.size(); ++i) { + auto &leaf{leaf_array[array[i]]}; - switch (leaf_array[index].type) { - case HOST_PARTIAL: - if (hostcmp(s->hostname, leaf_array[index].match) == 0) { - *opaque_ptr = leaf_array[index].opaque_data; + switch (leaf.type) { + case HostLeaf::HOST_PARTIAL: + if (hostcmp(s->hostname, leaf.match) == 0) { + *opaque_ptr = leaf.opaque_data; s->array_index = i; return true; } break; - case HOST_COMPLETE: + case HostLeaf::HOST_COMPLETE: // We have to have consumed the whole hostname for this to match // so that we do not match a rule for "example.com" to // "www.example.com // if (host_done == true) { - *opaque_ptr = leaf_array[index].opaque_data; + *opaque_ptr = leaf.opaque_data; s->array_index = i; return true; } break; - case DOMAIN_PARTIAL: - if (domaincmp(s->hostname, leaf_array[index].match) == false) { + case HostLeaf::DOMAIN_PARTIAL: + if (domaincmp(s->hostname, leaf.match) == false) { break; } // FALL THROUGH - case DOMAIN_COMPLETE: - *opaque_ptr = leaf_array[index].opaque_data; + case HostLeaf::DOMAIN_COMPLETE: + *opaque_ptr = leaf.opaque_data; s->array_index = i; return true; - case LEAF_INVALID: + case HostLeaf::LEAF_INVALID: // Should not get here ink_assert(0); break; @@ -1128,32 +861,13 @@ HostLookup::MatchArray(HostLookupState *s, void **opaque_ptr, DynArray &arr // // bool -HostLookup::MatchFirst(const char *host, HostLookupState *s, void **opaque_ptr) +HostLookup::MatchFirst(string_view host, HostLookupState *s, void **opaque_ptr) { - char *last_dot = nullptr; - - s->cur = root; - s->table_level = 0; - s->array_index = -1; - s->hostname = host ? host : ""; - s->host_copy = ats_strdup(s->hostname); - LowerCaseStr(s->host_copy); - - // Find the top level domain in the host copy - s->host_copy_next = s->host_copy; - while (*s->host_copy_next != '\0') { - if (*s->host_copy_next == '.') { - last_dot = s->host_copy_next; - } - s->host_copy_next++; - } - - if (last_dot == nullptr) { - // Must be an unqualified hostname, no dots - s->host_copy_next = s->host_copy; - } else { - s->host_copy_next = last_dot + 1; - } + s->cur = &root; + s->table_level = 0; + s->array_index = -1; + s->hostname = host; + s->hostname_stub = s->hostname; return MatchNext(s, opaque_ptr); } @@ -1169,60 +883,33 @@ HostLookup::MatchNext(HostLookupState *s, void **opaque_ptr) HostBranch *cur = s->cur; // Check to see if there is any work to be done - if (num_el <= 0) { + if (leaf_array.size() <= 0) { return false; } while (s->table_level <= HOST_TABLE_DEPTH) { - if (MatchArray(s, opaque_ptr, cur->leaf_indexs, (s->host_copy_next == nullptr))) { + if (MatchArray(s, opaque_ptr, cur->leaf_indices, s->hostname_stub.empty())) { return true; } // Check to see if we run out of tokens in the hostname - if (s->host_copy_next == nullptr) { + if (s->hostname_stub.empty()) { break; } // Check to see if there are any lower levels - if (cur->type == HOST_TERMINAL) { + if (cur->type == HostBranch::HOST_TERMINAL) { break; } - cur = FindNextLevel(cur, s->host_copy_next, true); + string_view token{TextView{s->hostname_stub}.suffix('.')}; + s->hostname_stub.remove_suffix(std::min(s->hostname_stub.size(), token.size() + 1)); + cur = FindNextLevel(cur, token, true); if (cur == nullptr) { break; } else { s->cur = cur; s->array_index = -1; - s->table_level++; - - // Find the next part of the hostname to process - if (s->host_copy_next <= s->host_copy) { - // Nothing left - s->host_copy_next = nullptr; - } else { - // Back up to period ahead of us and axe it - s->host_copy_next--; - ink_assert(*s->host_copy_next == '.'); - *s->host_copy_next = '\0'; - - s->host_copy_next--; - - while (true) { - if (s->host_copy_next <= s->host_copy) { - s->host_copy_next = s->host_copy; - break; - } - // Check for our stop. If we hit a period, we want - // the our portion of the hostname starts one character - // after it - if (*s->host_copy_next == '.') { - s->host_copy_next++; - break; - } - - s->host_copy_next--; - } - } + ++(s->table_level); } } @@ -1236,14 +923,7 @@ HostLookup::MatchNext(HostLookupState *s, void **opaque_ptr) void HostLookup::AllocateSpace(int num_entries) { - // Should not have been allocated before - ink_assert(array_len == -1); - - leaf_array = new HostLeaf[num_entries]; - memset(leaf_array, 0, sizeof(HostLeaf) * num_entries); - - array_len = num_entries; - num_el = 0; + leaf_array.reserve(num_entries); } // void HostLookup::NewEntry(const char* match_data, bool domain_record, void* opaque_data_in) @@ -1251,26 +931,8 @@ HostLookup::AllocateSpace(int num_entries) // Insert a new element in to the table // void -HostLookup::NewEntry(const char *match_data, bool domain_record, void *opaque_data_in) +HostLookup::NewEntry(string_view match_data, bool domain_record, void *opaque_data_in) { - // Make sure space has been allocated - ink_assert(num_el >= 0); - ink_assert(array_len >= 0); - - // Make sure we do not overrun the array; - ink_assert(num_el < array_len); - - leaf_array[num_el].match = ats_strdup(match_data); - leaf_array[num_el].opaque_data = opaque_data_in; - - if ('!' != *(leaf_array[num_el].match)) { - leaf_array[num_el].len = strlen(match_data); - leaf_array[num_el].isNot = false; - } else { - leaf_array[num_el].len = strlen(match_data) - 1; - leaf_array[num_el].isNot = true; - } - - TableInsert(match_data, num_el, domain_record); - num_el++; + leaf_array.emplace_back(match_data, opaque_data_in); + TableInsert(match_data, leaf_array.size() - 1, domain_record); } diff --git a/lib/ts/HostLookup.h b/lib/ts/HostLookup.h index 4b3e3710080..decf48ba778 100644 --- a/lib/ts/HostLookup.h +++ b/lib/ts/HostLookup.h @@ -23,109 +23,168 @@ /***************************************************************************** * - * HostLookup.h - Interface to genernal purpose matcher + * HostLookup.h - Interface to general purpose matcher * * ****************************************************************************/ #pragma once -#include "DynArray.h" +#include +#include +#include +#include + +#include // HostLookup constantss -const int HOST_TABLE_DEPTH = 3; // Controls the max number of levels in the logical tree -const int HOST_ARRAY_MAX = 8; // Sets the fixed array size +constexpr int HOST_TABLE_DEPTH = 3; // Controls the max number of levels in the logical tree +constexpr int HOST_ARRAY_MAX = 8; // Sets the fixed array size -// -// Begin Host Lookup Helper types -// -enum HostNodeType { - HOST_TERMINAL, - HOST_HASH, - HOST_INDEX, - HOST_ARRAY, -}; -enum LeafType { - LEAF_INVALID, - HOST_PARTIAL, - HOST_COMPLETE, - DOMAIN_COMPLETE, - DOMAIN_PARTIAL, -}; +class CharIndex; +class HostArray; -// The data in the HostMatcher tree is pointers to HostBranches. -// No duplicates keys permitted in the tree. To handle multiple -// data items bound the same key, the HostBranch has the lead_indexs -// array which stores pointers (in the form of array indexes) to -// HostLeaf structs +// The data in the HostMatcher tree is pointers to HostBranches. No duplicates keys permitted in the tree. To handle +// multiple data items bound the same key, the HostBranch has the lead_indexs array which stores pointers (in the form +// of array indexes) to HostLeaf structs // -// There is HostLeaf struct for each data item put into the -// table +// There is HostLeaf struct for each data item put into the table // struct HostLeaf { - LeafType type; - char *match; // Contains a copy of the match data - int len; // length of the data - bool isNot; // used by any fasssst path ... - void *opaque_data; // Data associated with this leaf + /// Type of leaf. + enum Type { + LEAF_INVALID, + HOST_PARTIAL, + HOST_COMPLETE, + DOMAIN_COMPLETE, + DOMAIN_PARTIAL, + }; + Type type{LEAF_INVALID}; ///< Type of this leaf instance. + std::string match; // Contains a copy of the match data + bool isNot{false}; // used by any fasssst path ... + void *opaque_data{nullptr}; // Data associated with this leaf + + HostLeaf() {} + HostLeaf(std::string_view name, void *data) : opaque_data(data) + { + if (!name.empty() && name.front() == '!') { + name.remove_prefix(1); + isNot = true; + } else { + isNot = false; + } + match.assign(name); + } }; struct HostBranch { - HostBranch(); + /// Branch type. + enum Type { + HOST_TERMINAL, + HOST_HASH, + HOST_INDEX, + HOST_ARRAY, + }; + + /// IntrusiveHashMap support. + struct Linkage { + static constexpr std::hash hasher{}; + static std::string_view + key_of(HostBranch *elt) + { + return elt->key; + } + static auto + hash_of(std::string_view key) + { + return hasher(key); + } + static bool + equal(std::string_view const &lhs, std::string_view const &rhs) + { + return lhs == rhs; + } + static HostBranch *& + next_ptr(HostBranch *elt) + { + return elt->link.next; + } + static HostBranch *& + prev_ptr(HostBranch *elt) + { + return elt->link.prev; + } + + HostBranch *next{nullptr}; + HostBranch *prev{nullptr}; + } link; + + using HostTable = ts::IntrusiveHashMap; + + using LeafIndices = std::vector; + + /// Type of data in this branch. + union Level { + std::nullptr_t _nil; ///< HOST_TERMINAL + HostTable *_table; ///< HOST_HASH + CharIndex *_index; ///< HOST_INDEX + HostArray *_array; ///< HOST_ARRAY + void *_ptr; ///< As generic pointer. + }; + ~HostBranch(); - int level; // what level in the tree. the root is level 0 - HostNodeType type; // tells what kind of data structure is next_level is - void *next_level; // opaque pointer to lookup structure - DynArray leaf_indexs; // pointers HostLeaf(s) + int level_idx{0}; // what level in the tree. the root is level 0 + Type type{HOST_TERMINAL}; // tells what kind of data structure is next_level is + Level next_level{nullptr}; // opaque pointer to lookup structure + LeafIndices leaf_indices; // HostLeaf indices. + std::string key; }; -typedef void (*HostLookupPrintFunc)(void *opaque_data); // // End Host Lookup Helper types // struct HostLookupState { - HostLookupState() : cur(nullptr), table_level(0), array_index(0), hostname(nullptr), host_copy(nullptr), host_copy_next(nullptr) - { - } - ~HostLookupState() { ats_free(host_copy); } - HostBranch *cur; - int table_level; - int array_index; - const char *hostname; - char *host_copy; // request lower-cased host name copy - char *host_copy_next; // ptr to part of host_copy for next use + HostBranch *cur{nullptr}; + int table_level{0}; + int array_index{0}; + std::string_view hostname; ///< Original host name. + std::string_view hostname_stub; ///< Remaining host name to search. }; class HostLookup { public: - HostLookup(const char *name); - ~HostLookup(); - void NewEntry(const char *match_data, bool domain_record, void *opaque_data_in); + using LeafArray = std::vector; + using PrintFunc = std::function; + + HostLookup(std::string_view name); + void NewEntry(std::string_view match_data, bool domain_record, void *opaque_data_in); void AllocateSpace(int num_entries); - bool Match(const char *host); - bool Match(const char *host, void **opaque_ptr); - bool MatchFirst(const char *host, HostLookupState *s, void **opaque_ptr); + bool Match(std::string_view host); + bool Match(std::string_view host, void **opaque_ptr); + bool MatchFirst(std::string_view host, HostLookupState *s, void **opaque_ptr); bool MatchNext(HostLookupState *s, void **opaque_ptr); - void Print(HostLookupPrintFunc f); + void Print(PrintFunc const &f); void Print(); - HostLeaf * - getLArray() + + LeafArray * + get_leaf_array() { - return leaf_array; - }; + return &leaf_array; + } private: - void TableInsert(const char *match_data, int index, bool domain_record); - HostBranch *TableNewLevel(HostBranch *from, const char *level_data); - HostBranch *InsertBranch(HostBranch *insert_in, const char *level_data); - HostBranch *FindNextLevel(HostBranch *from, const char *level_data, bool bNotProcess = false); - bool MatchArray(HostLookupState *s, void **opaque_ptr, DynArray &array, bool host_done); - void PrintHostBranch(HostBranch *hb, HostLookupPrintFunc f); - HostBranch *root; // The top of the search tree - HostLeaf *leaf_array; // array of all leaves in tree - int array_len; // the length of the arrays - int num_el; // the numbe of itmems in the tree - const char *matcher_name; // Used for Debug/Warning/Error messages + using HostTable = HostBranch::HostTable; + using LeafIndices = HostBranch::LeafIndices; + + void TableInsert(std::string_view match_data, int index, bool domain_record); + HostBranch *TableNewLevel(HostBranch *from, std::string_view level_data); + HostBranch *InsertBranch(HostBranch *insert_in, std::string_view level_data); + HostBranch *FindNextLevel(HostBranch *from, std::string_view level_data, bool bNotProcess = false); + bool MatchArray(HostLookupState *s, void **opaque_ptr, LeafIndices &array, bool host_done); + void PrintHostBranch(HostBranch *hb, PrintFunc const &f); + HostBranch root; // The top of the search tree + LeafArray leaf_array; // array of all leaves in tree + std::string matcher_name; // Used for Debug/Warning/Error messages }; diff --git a/lib/ts/IntrusiveHashMap.h b/lib/ts/IntrusiveHashMap.h new file mode 100644 index 00000000000..b39514c4934 --- /dev/null +++ b/lib/ts/IntrusiveHashMap.h @@ -0,0 +1,662 @@ +/** @file + + Instrusive hash map. + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#pragma once + +#include +#include +#include +#include + +namespace ts +{ +/** Intrusive Hash Table. + + Values stored in this container are not destroyed when the container is destroyed or removed from the container. + They must be released by the client. + + Duplicate keys are allowed. Clients must walk the list for multiple entries. + @see @c Location::operator++() + + By default the table automatically expands to limit the average chain length. This can be tuned. If set + to @c MANUAL then the table will expand @b only when explicitly requested to do so by the client. + @see @c ExpansionPolicy + @see @c setExpansionPolicy() + @see @c setExpansionLimit() + @see @c expand() + + The hash table is configured by a descriptor class. This must contain the following members + + - The static method key_type key_of(value_type *) which returns the key for an instance of @c value_type. + + - The static method bool equal(key_type lhs, key_type rhs) which checks if two instances of @c Key are the same. + + - The static method hash_id hash_of(key_type) which computes the hash value of the key. @c ID must a numeric type. + + - The static method value_type *& next_ptr(value_type *) which returns a reference to a forward pointer. + + - The static method value_type *& prev_ptr(value_type *) which returns a reference to a backwards pointer. + + These are the required members, it is permitted to have other methods (if the descriptor is used for other purposes) + or to provide overloads of the methods. Note this is compatible with @c IntrusiveDList. + + Several internal types are deduced from these arguments. + + @a Key is the return type of @a key_of and represents the key that distinguishes instances of @a value_type. Two + instances of @c value_type are considered the same if their respective @c Key values are @c equal. @c Key is + presumed cheap to copy. If the underlying key is not a simple type then @a Key should be a constant pointer or a + constant reference. The hash table will never attempt to modify a key. + + @a ID The numeric type that is the hash value for an instance of @a Key. + + Example for @c HttpServerSession keyed by the origin server IP address. + + @code + struct Descriptor { + static sockaddr const* key_of(HttpServerSession const* value) { return &value->ip.sa } + static bool equal(sockaddr const* lhs, sockaddr const* rhs) { return ats_ip_eq(lhs, rhs); } + static uint32_t hash_of(sockaddr const* key) { return ats_ip_hash(key); } + static HttpServerSession *& next_ptr(HttpServerSession * ssn) { return ssn->_next; } + static HttpServerSession *& prev_ptr(HttpServerSession * ssn) { return ssn->_prev; } + }; + using Table = IntrusiveHashMap; + @endcode + + */ +template class IntrusiveHashMap +{ + using self_type = IntrusiveHashMap; + +public: + /// Type of elements in the map. + using value_type = typename std::remove_pointer::type>::type; + /// Key type for the elements. + using key_type = decltype(H::key_of(static_cast(nullptr))); + /// The numeric hash ID computed from a key. + using hash_id = decltype(H::hash_of(H::key_of(static_cast(nullptr)))); + + /// When the hash table is expanded. + enum ExpansionPolicy { + MANUAL, ///< Client must explicitly expand the table. + AVERAGE, ///< Table expands if average chain length exceeds limit. [default] + MAXIMUM ///< Table expands if any chain length exceeds limit. + }; + +protected: + /** List of elements. + * All table elements are in this list. The buckets reference their starting element in the list, or nothing if + * no elements are in that bucket. + */ + using List = IntrusiveDList; + + /// A bucket for the hash map. + struct Bucket { + /// Support for IntrusiveDList, definitions and link storage. + struct Linkage { + static Bucket *&next_ptr(Bucket *b); ///< Access next pointer. + static Bucket *&prev_ptr(Bucket *b); ///< Access prev pointer. + Bucket *_prev{nullptr}; ///< Prev pointer. + Bucket *_next{nullptr}; ///< Next pointer. + } _link; + + value_type *_v{nullptr}; ///< First element in the bucket. + size_t _count{0}; ///< Number of elements in the bucket. + + /** Marker for the chain having different keys. + + This is used to determine if expanding the hash map would be useful - buckets that are not mixed + will not be changed by an expansion. + */ + bool _mixed_p{false}; + + /// Compute the limit value for iteration in this bucket. + /// This is the value of the next bucket, or @c nullptr if no next bucket. + value_type *limit() const; + + /// Verify @a v is in this bucket. + bool contains(value_type *v) const; + + void clear(); ///< Reset to initial state. + }; + +public: + /// The default starting number of buckets. + static size_t constexpr DEFAULT_BUCKET_COUNT = 7; ///< POOMA. + /// The default expansion policy limit. + static size_t constexpr DEFAULT_EXPANSION_LIMIT = 4; ///< Value from previous version. + /// Expansion policy if not specified in constructor. + static ExpansionPolicy constexpr DEFAULT_EXPANSION_POLICY = AVERAGE; + + using iterator = typename List::iterator; + using const_iterator = typename List::const_iterator; + + /// A range of elements in the map. + /// It is a half open range, [first, last) in the usual STL style. + /// @internal I tried @c std::pair as a base for this but was unable to get STL container operations to work. + struct range { + iterator first; ///< First element. + iterator last; ///< Past last element. + + /// Construct from two iterators. + range(iterator const &lhs, iterator const &rhs); + + // These methods enable treating the range as a view in to the hash map. + + /// Return @a first + iterator const &begin() const; + /// Return @a last + iterator const &end() const; + }; + + /// A range of constant elements in the map. + struct const_range { + const_iterator first; ///< First element. + const_iterator last; ///< Past last element. + + /// Construct from two iterators. + const_range(const_iterator const &lhs, const_iterator const &rhs); + + // These methods enable treating the range as a view in to the hash map. + + /// Return @a first + const_iterator const &begin() const; + /// Return @a last + const_iterator const &end() const; + }; + + /// Construct, starting with @n buckets. + /// This doubles as the default constructor. + IntrusiveHashMap(size_t n = DEFAULT_BUCKET_COUNT); + + /** Remove all values from the table. + + The values are not cleaned up. The values are not touched in this method, therefore it is safe + to destroy them first and then @c clear this table. + */ + self_type &clear(); + + iterator begin(); ///< First element. + const_iterator begin() const; ///< First element. + iterator end(); ///< Past last element. + const_iterator end() const; ///< Past last element. + + /** Insert a value in to the table. + The @a value must @b NOT already be in a table of this type. + @note The value itself is put in the table, @b not a copy. + */ + void insert(value_type *v); + + /** Find an element with a key equal to @a key. + + @return A element with a matching key, or the end iterator if not found. + */ + const_iterator find(key_type key) const; + iterator find(key_type key); + + /** Get an iterator for an existing value @a v. + + @return An iterator that references @a v, or the end iterator if @a v is not in the table. + */ + const_iterator find(value_type const *v) const; + iterator find(value_type *v); + + /** Find the range of objects with keys equal to @a key. + + @return A iterator pair of [first, last) items with equal keys. + */ + const_range equal_range(key_type key) const; + range equal_range(key_type key); + + /** Get an @c iterator for the value @a v. + + This is a bit obscure but needed in certain cases. It should only be used on a @a value that + is already known to be in the table. + */ + iterator iterator_for(value_type *v); + + /** Remove the value at @a loc from the table. + + @note This does @b not clean up the removed elements. Use carefully to avoid leaks. + + @return @c true if the value was removed, @c false otherwise. + */ + bool erase(iterator const &loc); + + /// Remove all elements in the @c range @a r. + bool erase(range const &r); + /// Remove all elements in the range (first, last] + bool erase(iterator const &first, iterator const &last); + + /** Apply @a F(value_type&) to every element in the hash map. + * + * This is similar to a range for loop except the iteration is done in a way where destruction or alternation of + * the element does @b not affect the iterator. Primarily this is useful for @c delete to clean up the elements + * but it can have other uses. + * + * @tparam F A functional object of the form void F(value_type&) + * @param f The function to apply. + * @return + */ + template self_type &apply(F &&f); + + /** Expand the hash if needed. + + Useful primarily when the expansion policy is set to @c MANUAL. + */ + void expand(); + + /// Number of elements in the map. + size_t count() const; + + /// Number of buckets in the array. + size_t bucket_count() const; + + /// Set the expansion policy to @a policy. + self_type &set_expansion_policy(ExpansionPolicy policy); + + /// Get the current expansion policy + ExpansionPolicy get_expansion_policy() const; + + /// Set the limit value for the expansion policy. + self_type &set_expansion_limit(size_t n); + + /// Set the limit value for the expansion policy. + size_t get_expansion_limit() const; + +protected: + /// The type of storage for the buckets. + using Table = std::vector; + + List _list; ///< Elements in the table. + Table _table; ///< Array of buckets. + + /// List of non-empty buckets. + IntrusiveDList _active_buckets; + + Bucket *bucket_for(key_type key); + + ExpansionPolicy _expansion_policy{DEFAULT_EXPANSION_POLICY}; ///< When to exand the table. + size_t _expansion_limit{DEFAULT_EXPANSION_LIMIT}; ///< Limit value for expansion. + + // noncopyable + IntrusiveHashMap(const IntrusiveHashMap &) = delete; + IntrusiveHashMap &operator=(const IntrusiveHashMap &) = delete; + + // Hash table size prime list. + static constexpr std::array PRIME = {{1, 3, 7, 13, 31, 61, 127, 251, + 509, 1021, 2039, 4093, 8191, 16381, 32749, 65521, + 131071, 262139, 524287, 1048573, 2097143, 4194301, 8388593, 16777213, + 33554393, 67108859, 134217689, 268435399, 536870909}}; +}; + +template +auto +IntrusiveHashMap::Bucket::Linkage::next_ptr(Bucket *b) -> Bucket *& +{ + return b->_link._next; +} + +template +auto +IntrusiveHashMap::Bucket::Linkage::prev_ptr(Bucket *b) -> Bucket *& +{ + return b->_link._prev; +} + +// This is designed so that if the bucket is empty, then @c nullptr is returned, which will immediately terminate +// a search loop on an empty bucket because that will start with a nullptr candidate, matching the limit. +template +auto +IntrusiveHashMap::Bucket::limit() const -> value_type * +{ + Bucket *n{_link._next}; + return n ? n->_v : nullptr; +}; + +template +void +IntrusiveHashMap::Bucket::clear() +{ + _v = nullptr; + _count = 0; + _mixed_p = false; +} + +template +bool +IntrusiveHashMap::Bucket::contains(value_type *v) const +{ + value_type *x = _v; + value_type *limit = this->limit(); + while (x != limit && x != v) { + x = H::next_ptr(x); + } + return x == v; +} + +// --------------------- +template IntrusiveHashMap::range::range(iterator const &lhs, iterator const &rhs) : first(lhs), last(rhs) {} + +template +auto +IntrusiveHashMap::range::begin() const -> iterator const & +{ + return first; +} +template +auto +IntrusiveHashMap::range::end() const -> iterator const & +{ + return last; +} + +template +IntrusiveHashMap::const_range::const_range(const_iterator const &lhs, const_iterator const &rhs) : first(lhs), last(rhs) +{ +} + +template +auto +IntrusiveHashMap::const_range::begin() const -> const_iterator const & +{ + return first; +} + +template +auto +IntrusiveHashMap::const_range::end() const -> const_iterator const & +{ + return last; +} + +// --------------------- + +template IntrusiveHashMap::IntrusiveHashMap(size_t n) +{ + if (n) { + _table.resize(*std::lower_bound(PRIME.begin(), PRIME.end(), n)); + } +} + +template +auto +IntrusiveHashMap::bucket_for(key_type key) -> Bucket * +{ + return &_table[H::hash_of(key) % _table.size()]; +} + +template +auto +IntrusiveHashMap::begin() -> iterator +{ + return _list.begin(); +} + +template +auto +IntrusiveHashMap::begin() const -> const_iterator +{ + return _list.begin(); +} + +template +auto +IntrusiveHashMap::end() -> iterator +{ + return _list.end(); +} + +template +auto +IntrusiveHashMap::end() const -> const_iterator +{ + return _list.end(); +} + +template +auto +IntrusiveHashMap::clear() -> self_type & +{ + for (auto &b : _table) { + b.clear(); + } + // Clear container data. + _list.clear(); + _active_buckets.clear(); + return *this; +} + +template +auto +IntrusiveHashMap::find(key_type key) -> iterator +{ + Bucket *b = this->bucket_for(key); + value_type *v = b->_v; + value_type *limit = b->limit(); + while (v != limit && !H::equal(key, H::key_of(v))) { + v = H::next_ptr(v); + } + return _list.iterator_for(v); +} + +template +auto +IntrusiveHashMap::find(key_type key) const -> const_iterator +{ + return const_cast(this)->find(key); +} + +template +auto +IntrusiveHashMap::equal_range(key_type key) -> range +{ + iterator first{this->find(key)}; + iterator last{first}; + iterator limit{this->end()}; + + while (last != limit && H::equal(key, H::key_of(&*last))) { + ++last; + } + + return {first, last}; +} + +template +auto +IntrusiveHashMap::equal_range(key_type key) const -> const_range +{ + return const_cast(this)->equal_range(key); +} + +template +auto +IntrusiveHashMap::iterator_for(value_type *v) -> iterator +{ + return _list.iterator_for(v); +} + +template +auto +IntrusiveHashMap::find(value_type *v) -> iterator +{ + Bucket *b = this->bucket_for(H::key_of(v)); + return b->contains(v) ? _list.iterator_for(v) : this->end(); +} + +template +auto +IntrusiveHashMap::find(value_type const *v) const -> const_iterator +{ + return const_cast(this)->find(const_cast(v)); +} + +template +void +IntrusiveHashMap::insert(value_type *v) +{ + auto key = H::key_of(v); + Bucket *bucket = this->bucket_for(key); + value_type *spot = bucket->_v; + + if (nullptr == spot) { // currently empty bucket, set it and add to active list. + _list.append(v); + bucket->_v = v; + _active_buckets.append(bucket); + } else { + value_type *limit = bucket->limit(); + + while (spot != limit && !H::equal(key, H::key_of(spot))) { + spot = H::next_ptr(spot); + } + + if (spot == limit) { // this key is not in the bucket, add it at the end and note this is now a mixed bucket. + _list.insert_before(bucket->_v, v); + bucket->_v = v; + bucket->_mixed_p = true; + } else { // insert before the first matching key. + _list.insert_before(spot, v); + if (spot == bucket->_v) { // added before the bucket start, update the start. + bucket->_v = v; + } else { // if the matching key wasn't first, there is some other key in the bucket, mark it mixed. + bucket->_mixed_p = true; + } + } + } + ++bucket->_count; + + // auto expand if appropriate. + if ((AVERAGE == _expansion_policy && (_list.count() / _table.size()) > _expansion_limit) || + (MAXIMUM == _expansion_policy && bucket->_count > _expansion_limit && bucket->_mixed_p)) { + this->expand(); + } +} + +template +bool +IntrusiveHashMap::erase(iterator const &loc) +{ + bool zret = false; + + if (loc != this->end()) { + value_type *v = &*loc; + Bucket *b = this->bucket_for(H::key_of(v)); + if (b->contains(v)) { + value_type *nv = H::next_ptr(v); + value_type *limit = b->limit(); + if (b->_v == v) { // removed first element in bucket, update bucket + if (limit == nv) { // that was also the only element, deactivate bucket + _active_buckets.erase(b); + b->clear(); + } else { + b->_v = nv; + --b->_count; + } + } + zret = true; + _list.erase(v); + } + } + return zret; +} + +template +template +auto +IntrusiveHashMap::apply(F &&f) -> self_type & +{ + iterator spot{this->begin()}; + iterator limit{this->end()}; + while (spot != limit) { + f(*spot++); // post increment means @a spot is updated before @a f is applied. + } + return *this; +}; + +template +void +IntrusiveHashMap::expand() +{ + ExpansionPolicy org_expansion_policy = _expansion_policy; // save for restore. + value_type *old = _list.head(); // save for repopulating. + auto old_size = _table.size(); + + // Reset to empty state. + this->clear(); + _table.resize(*std::lower_bound(PRIME.begin(), PRIME.end(), old_size + 1)); + + _expansion_policy = MANUAL; // disable any auto expand while we're expanding. + while (old) { + value_type *v = old; + old = H::next_ptr(old); + this->insert(v); + } + // stashed array gets cleaned up when @a tmp goes out of scope. + _expansion_policy = org_expansion_policy; // reset to original value. +} + +template +size_t +IntrusiveHashMap::count() const +{ + return _list.count(); +} + +template +size_t +IntrusiveHashMap::bucket_count() const +{ + return _table.size(); +} + +template +auto +IntrusiveHashMap::set_expansion_policy(ExpansionPolicy policy) -> self_type & +{ + _expansion_policy = policy; + return *this; +} + +template +auto +IntrusiveHashMap::get_expansion_policy() const -> ExpansionPolicy +{ + return _expansion_policy; +} + +template +auto +IntrusiveHashMap::set_expansion_limit(size_t n) -> self_type & +{ + _expansion_limit = n; + return *this; +} + +template +size_t +IntrusiveHashMap::get_expansion_limit() const +{ + return _expansion_limit; +} +/* ---------------------------------------------------------------------------------------------- */ + +} // namespace ts diff --git a/lib/ts/Makefile.am b/lib/ts/Makefile.am index 3f30afdff98..660d92a1774 100644 --- a/lib/ts/Makefile.am +++ b/lib/ts/Makefile.am @@ -271,6 +271,7 @@ test_tslib_SOURCES = \ unit-tests/test_History.cc \ unit-tests/test_ink_inet.cc \ unit-tests/test_IntrusiveDList.cc \ + unit-tests/test_IntrusiveHashMap.cc \ unit-tests/test_IntrusivePtr.cc \ unit-tests/test_IpMap.cc \ unit-tests/test_layout.cc \ diff --git a/lib/ts/unit-tests/test_IntrusiveHashMap.cc b/lib/ts/unit-tests/test_IntrusiveHashMap.cc new file mode 100644 index 00000000000..ba30e977720 --- /dev/null +++ b/lib/ts/unit-tests/test_IntrusiveHashMap.cc @@ -0,0 +1,148 @@ +/** @file + + IntrusiveHashMap unit tests. + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include "../../../tests/include/catch.hpp" + +// ------------- +// --- TESTS --- +// ------------- + +using namespace std::literals; + +namespace +{ +struct Thing { + std::string _payload; + int _n{0}; + + Thing(std::string_view text) : _payload(text) {} + Thing(std::string_view text, int x) : _payload(text), _n(x) {} + + Thing *_next{nullptr}; + Thing *_prev{nullptr}; +}; + +struct ThingMapDescriptor { + static Thing *& + next_ptr(Thing *thing) + { + return thing->_next; + } + static Thing *& + prev_ptr(Thing *thing) + { + return thing->_prev; + } + static std::string_view + key_of(Thing *thing) + { + return thing->_payload; + } + static constexpr std::hash hasher{}; + static auto + hash_of(std::string_view s) -> decltype(hasher(s)) + { + return hasher(s); + } + static bool + equal(std::string_view const &lhs, std::string_view const &rhs) + { + return lhs == rhs; + } +}; + +using Map = ts::IntrusiveHashMap; + +} // namespace + +TEST_CASE("IntrusiveHashMap", "[libts][IntrusiveHashMap]") +{ + Map map; + map.insert(new Thing("bob")); + REQUIRE(map.count() == 1); + map.insert(new Thing("dave")); + map.insert(new Thing("persia")); + REQUIRE(map.count() == 3); + for (auto &thing : map) { + delete &thing; + } + map.clear(); + REQUIRE(map.count() == 0); + + size_t nb = map.bucket_count(); + std::bitset<64> marks; + for (int i = 1; i <= 63; ++i) { + std::string name; + ts::bwprint(name, "{} squared is {}", i, i * i); + Thing *thing = new Thing(name); + thing->_n = i; + map.insert(thing); + REQUIRE(map.count() == i); + REQUIRE(map.find(name) != map.end()); + } + REQUIRE(map.count() == 63); + REQUIRE(map.bucket_count() > nb); + for (auto &thing : map) { + REQUIRE(0 == marks[thing._n]); + marks[thing._n] = 1; + } + marks[0] = 1; + REQUIRE(marks.all()); + map.insert(new Thing("dup"sv, 79)); + map.insert(new Thing("dup"sv, 80)); + map.insert(new Thing("dup"sv, 81)); + + auto r = map.equal_range("dup"sv); + REQUIRE(r.first != r.last); + REQUIRE(r.first->_payload == "dup"sv); + + Map::iterator idx; + + // Erase all the non-"dup" and see if the range is still correct. + map.apply([&map](Thing &thing) { + if (thing._payload != "dup"sv) + map.erase(map.iterator_for(&thing)); + }); + r = map.equal_range("dup"sv); + REQUIRE(r.first != r.last); + idx = r.first; + REQUIRE(idx->_payload == "dup"sv); + REQUIRE((++idx)->_payload == "dup"sv); + REQUIRE(idx->_n != r.first->_n); + REQUIRE((++idx)->_payload == "dup"sv); + REQUIRE(idx->_n != r.first->_n); + REQUIRE(++idx == map.end()); + // Verify only the "dup" items are left. + for (auto &&elt : map) { + REQUIRE(elt._payload == "dup"sv); + } + // clean up the last bits. + map.apply([](Thing &thing) { delete &thing; }); +};