-
Notifications
You must be signed in to change notification settings - Fork 237
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Building a std::map for tags is somewhat expensive, especially when we know that the number of tags is usually quite small. Instead, use a custom structure that does a crappy-but-fast hash to put the keys/values in one of 16 buckets, then linear search the bucket. For GB, before: ``` real 1m11.507s user 16m49.604s sys 0m17.381s ``` After: ``` real 1m9.557s user 16m28.826s sys 0m17.937s ``` Saving 2 seconds of wall clock and 20 seconds of user time doesn't seem like much, but (a) it's not nothing and (b) having the tags in this format will enable us to thwart some of Lua's defensive copies in a subsequent commit. A note about the hash function: hashing each letter of the string using boost::hash_combine eliminated the time savings.
- Loading branch information
Showing
8 changed files
with
180 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
#ifndef _TAG_MAP_H | ||
#define _TAG_MAP_H | ||
|
||
#include <vector> | ||
#include <string> | ||
#include <boost/container/flat_map.hpp> | ||
|
||
// We track tags in a special structure, which enables some tricks when | ||
// doing Lua interop. | ||
// | ||
// The alternative is a std::map - but often, our map is quite small. | ||
// It's preferable to have a small set of vectors and do linear search. | ||
// | ||
// Further, we can avoid passing std::string from Lua -> C++ in some cases | ||
// by first checking to see if the string we would have passed is already | ||
// stored in our tag map, and passing a reference to its location. | ||
|
||
// Assumptions: | ||
// 1. Not thread-safe. | ||
// 2. Lifetime of map is less than lifetime of keys/values that are passed. | ||
class TagMap { | ||
public: | ||
TagMap(); | ||
void reset(); | ||
|
||
void addTag(const std::string& key, const std::string& value); | ||
const std::string* getTag(const std::string& key) const; | ||
|
||
boost::container::flat_map<std::string, std::string> exportToBoostMap() const; | ||
|
||
private: | ||
uint32_t ensureString( | ||
std::vector<std::vector<const std::string*>>& vector, | ||
const std::string& value | ||
); | ||
|
||
|
||
std::vector<std::vector<const std::string*>> keys; | ||
std::vector<std::vector<uint32_t>> key2value; | ||
std::vector<std::vector<const std::string*>> values; | ||
}; | ||
|
||
#endif _TAG_MAP_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
#include "tag_map.h" | ||
#include <boost/functional/hash.hpp> | ||
#include <iostream> | ||
|
||
TagMap::TagMap() { | ||
keys.resize(16); | ||
key2value.resize(16); | ||
values.resize(16); | ||
} | ||
|
||
void TagMap::reset() { | ||
for (int i = 0; i < 16; i++) { | ||
keys[i].clear(); | ||
key2value[i].clear(); | ||
values[i].clear(); | ||
} | ||
} | ||
|
||
const std::size_t hashString(const std::string& str) { | ||
// This is a pretty crappy hash function in terms of bit | ||
// avalanching and distribution of output values. | ||
// | ||
// But it's very good in terms of speed, which turns out | ||
// to be the important measure. | ||
std::size_t hash = str.size(); | ||
if (hash >= 4) | ||
hash ^= *(uint32_t*)str.data(); | ||
|
||
return hash; | ||
} | ||
|
||
uint32_t TagMap::ensureString( | ||
std::vector<std::vector<const std::string*>>& vector, | ||
const std::string& value | ||
) { | ||
std::size_t hash = hashString(value); | ||
|
||
const uint16_t shard = hash % vector.size(); | ||
for (int i = 0; i < vector[shard].size(); i++) | ||
if (*(vector[shard][i]) == value) | ||
return shard << 16 | i; | ||
|
||
vector[shard].push_back(&value); | ||
return shard << 16 | (vector[shard].size() - 1); | ||
} | ||
|
||
|
||
void TagMap::addTag(const std::string& key, const std::string& value) { | ||
uint32_t valueLoc = ensureString(values, value); | ||
// std::cout << "valueLoc = " << valueLoc << std::endl; | ||
uint32_t keyLoc = ensureString(keys, key); | ||
// std::cout << "keyLoc = " << keyLoc << std::endl; | ||
|
||
|
||
const uint16_t shard = keyLoc >> 16; | ||
const uint16_t pos = keyLoc; | ||
// std::cout << "shard=" << shard << ", pos=" << pos << std::endl; | ||
if (key2value[shard].size() <= pos) { | ||
// std::cout << "growing shard" << std::endl; | ||
key2value[shard].resize(pos + 1); | ||
} | ||
|
||
key2value[shard][pos] = valueLoc; | ||
} | ||
|
||
const std::string* TagMap::getTag(const std::string& key) const { | ||
// Returns nullptr if absent, else pointer to value. | ||
std::size_t hash = hashString(key); | ||
|
||
const uint16_t shard = hash % keys.size(); | ||
for (int i = 0; i < keys[shard].size(); i++) | ||
if (*(keys[shard][i]) == key) { | ||
const uint32_t valueLoc = key2value[shard][i]; | ||
return values[valueLoc >> 16][valueLoc & 0xFFFF]; | ||
} | ||
|
||
return nullptr; | ||
} | ||
|
||
boost::container::flat_map<std::string, std::string> TagMap::exportToBoostMap() const { | ||
boost::container::flat_map<std::string, std::string> rv; | ||
|
||
for (int i = 0; i < keys.size(); i++) { | ||
for (int j = 0; j < keys[i].size(); j++) { | ||
uint32_t valueLoc = key2value[i][j]; | ||
rv[*keys[i][j]] = *values[valueLoc >> 16][valueLoc & 0xFFFF]; | ||
} | ||
} | ||
|
||
return rv; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters