Skip to content

Commit

Permalink
Progress on plumbing a string pool for full_keys through
Browse files Browse the repository at this point in the history
  • Loading branch information
e-n-f committed Nov 4, 2024
1 parent b3b89e1 commit ab95915
Show file tree
Hide file tree
Showing 7 changed files with 118 additions and 93 deletions.
14 changes: 7 additions & 7 deletions attribute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,9 @@ void set_attribute_accum(std::unordered_map<std::string, attribute_op> &attribut
}

template <class T>
static void preserve_attribute1(attribute_op const &op, std::string const &key, T const &val, std::vector<std::string> &full_keys, std::vector<T> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state) {
static void preserve_attribute1(attribute_op const &op, std::string const &key, T const &val, std::vector<std::shared_ptr<std::string>> &full_keys, std::vector<T> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state, key_pool &key_pool) {
for (size_t i = 0; i < full_keys.size(); i++) {
if (key == full_keys[i]) {
if (key == *full_keys[i]) {
switch (op) {
case op_sum:
full_values[i] = (full_values[i].to_double() + val.to_double());
Expand Down Expand Up @@ -193,14 +193,14 @@ static void preserve_attribute1(attribute_op const &op, std::string const &key,
exit(EXIT_IMPOSSIBLE);
}

full_keys.push_back(key);
full_keys.push_back(key_pool.pool(key));
full_values.push_back(v);
}

void preserve_attribute(attribute_op const &op, std::string const &key, mvt_value const &val, std::vector<std::string> &full_keys, std::vector<mvt_value> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state) {
preserve_attribute1(op, key, val, full_keys, full_values, attribute_accum_state);
void preserve_attribute(attribute_op const &op, std::string const &key, mvt_value const &val, std::vector<std::shared_ptr<std::string>> &full_keys, std::vector<mvt_value> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state, key_pool &key_pool) {
preserve_attribute1(op, key, val, full_keys, full_values, attribute_accum_state, key_pool);
}

void preserve_attribute(attribute_op const &op, std::string const &key, serial_val const &val, std::vector<std::string> &full_keys, std::vector<serial_val> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state) {
preserve_attribute1(op, key, val, full_keys, full_values, attribute_accum_state);
void preserve_attribute(attribute_op const &op, std::string const &key, serial_val const &val, std::vector<std::shared_ptr<std::string>> &full_keys, std::vector<serial_val> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state, key_pool &key_pool) {
preserve_attribute1(op, key, val, full_keys, full_values, attribute_accum_state, key_pool);
}
6 changes: 4 additions & 2 deletions attribute.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <vector>
#include <unordered_map>
#include <map>
#include <memory>
#include "mvt.hpp"
#include "milo/dtoa_milo.h"

Expand All @@ -24,12 +25,13 @@ struct accum_state {
};

struct serial_val;
struct key_pool;

void set_attribute_accum(std::unordered_map<std::string, attribute_op> &attribute_accum, std::string name, std::string type);
void set_attribute_accum(std::unordered_map<std::string, attribute_op> &attribute_accum, const char *arg, char **argv);

void preserve_attribute(attribute_op const &op, std::string const &key, serial_val const &val, std::vector<std::string> &full_keys, std::vector<serial_val> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state);
void preserve_attribute(attribute_op const &op, std::string const &key, mvt_value const &val, std::vector<std::string> &full_keys, std::vector<mvt_value> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state);
void preserve_attribute(attribute_op const &op, std::string const &key, serial_val const &val, std::vector<std::shared_ptr<std::string>> &full_keys, std::vector<serial_val> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state, key_pool &key_pool);
void preserve_attribute(attribute_op const &op, std::string const &key, mvt_value const &val, std::vector<std::shared_ptr<std::string>> &full_keys, std::vector<mvt_value> &full_values, std::unordered_map<std::string, accum_state> &attribute_accum_state, key_pool &key_pool);

extern std::map<std::string, attribute_op> numeric_operations;

Expand Down
49 changes: 26 additions & 23 deletions clip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1163,14 +1163,14 @@ static void add_mean(mvt_feature &feature, mvt_layer &layer, std::string const &
};

// accumulate :sum:, :min:, :max:, and :count: versions of the specified attribute
static void preserve_numeric(const std::string &key, const mvt_value &val, // numeric attribute being accumulated
std::vector<std::string> &full_keys, // keys of feature being accumulated onto
std::vector<mvt_value> &full_values, // values of features being accumulated onto
const std::string &accumulate_numeric, // prefix of accumulations
std::set<std::string> &keys, // key presence in the source feature
std::map<std::string, size_t> &numeric_out_field, // key index in the output feature
std::unordered_map<std::string, accum_state> &attribute_accum_state // accumulation state for preserve_attribute()
) {
static void preserve_numeric(const std::string &key, const mvt_value &val, // numeric attribute being accumulated
std::vector<std::shared_ptr<std::string>> &full_keys, // keys of feature being accumulated onto
std::vector<mvt_value> &full_values, // values of features being accumulated onto
const std::string &accumulate_numeric, // prefix of accumulations
std::set<std::string> &keys, // key presence in the source feature
std::map<std::string, size_t> &numeric_out_field, // key index in the output feature
std::unordered_map<std::string, accum_state> &attribute_accum_state, // accumulation state for preserve_attribute()
key_pool &key_pool) {
// If this is a numeric attribute, but there is also a prefix:sum (etc.) for the
// same attribute, we want to use that one instead of this one.

Expand Down Expand Up @@ -1213,7 +1213,7 @@ static void preserve_numeric(const std::string &key, const mvt_value &val, /
if (out_attr == numeric_out_field.end()) {
// not present at all, so copy our value to the prefixed output
numeric_out_field.emplace(prefixed, full_keys.size());
full_keys.push_back(prefixed);
full_keys.push_back(key_pool.pool(prefixed));

if (op.second == op_count) {
if (starting_from_accumulation) {
Expand All @@ -1229,7 +1229,7 @@ static void preserve_numeric(const std::string &key, const mvt_value &val, /
} else {
// exists unprefixed, so copy it, and then accumulate on our value
numeric_out_field.emplace(prefixed, full_keys.size());
full_keys.push_back(prefixed);
full_keys.push_back(key_pool.pool(prefixed));

if (op.second == op_count) {
mvt_value v;
Expand All @@ -1243,7 +1243,7 @@ static void preserve_numeric(const std::string &key, const mvt_value &val, /
full_values.push_back(v);
} else {
full_values.push_back(full_values[out_attr->second]);
preserve_attribute(op.second, prefixed, val, full_keys, full_values, attribute_accum_state);
preserve_attribute(op.second, prefixed, val, full_keys, full_values, attribute_accum_state, key_pool);
}
}
} else {
Expand All @@ -1256,7 +1256,7 @@ static void preserve_numeric(const std::string &key, const mvt_value &val, /
full_values[prefixed_attr->second] = mvt_value(mvt_value_to_long_long(full_values[prefixed_attr->second]) + 1);
}
} else {
preserve_attribute(op.second, prefixed, val, full_keys, full_values, attribute_accum_state);
preserve_attribute(op.second, prefixed, val, full_keys, full_values, attribute_accum_state, key_pool);
}
}
}
Expand Down Expand Up @@ -1289,7 +1289,8 @@ static void feature_out(std::vector<tile_feature> const &features, mvt_layer &ou
std::set<std::string> const &exclude,
std::vector<std::string> const &exclude_prefix,
std::unordered_map<std::string, attribute_op> const &attribute_accum,
std::string const &accumulate_numeric) {
std::string const &accumulate_numeric,
key_pool &key_pool) {
// Add geometry to output feature

mvt_feature outfeature;
Expand All @@ -1315,7 +1316,7 @@ static void feature_out(std::vector<tile_feature> const &features, mvt_layer &ou
// multiplier cluster accumulated onto them

std::unordered_map<std::string, accum_state> attribute_accum_state;
std::vector<std::string> full_keys;
std::vector<std::shared_ptr<std::string>> full_keys;
std::vector<mvt_value> full_values;
std::map<std::string, size_t> numeric_out_field;

Expand All @@ -1324,12 +1325,12 @@ static void feature_out(std::vector<tile_feature> const &features, mvt_layer &ou
auto f = attribute_accum.find(key);
if (f != attribute_accum.end()) {
// this attribute has an accumulator, so convert it
full_keys.push_back(features[0].layer->keys[features[0].tags[i]]);
full_keys.push_back(key_pool.pool(features[0].layer->keys[features[0].tags[i]]));
full_values.push_back(features[0].layer->values[features[0].tags[i + 1]]);
} else if (accumulate_numeric.size() > 0 && features[0].layer->values[features[0].tags[i + 1]].is_numeric()) {
// convert numeric for accumulation
numeric_out_field.emplace(key, full_keys.size());
full_keys.push_back(key);
full_keys.push_back(key_pool.pool(key));
full_values.push_back(features[0].layer->values[features[0].tags[i + 1]]);
} else {
// otherwise just tag it directly onto the output feature
Expand Down Expand Up @@ -1357,13 +1358,13 @@ static void feature_out(std::vector<tile_feature> const &features, mvt_layer &ou
auto f = attribute_accum.find(key);
if (f != attribute_accum.end()) {
mvt_value val = features[i].layer->values[features[i].tags[j + 1]];
preserve_attribute(f->second, key, val, full_keys, full_values, attribute_accum_state);
preserve_attribute(f->second, key, val, full_keys, full_values, attribute_accum_state, key_pool);
} else if (accumulate_numeric.size() > 0) {
const mvt_value &val = features[i].layer->values[features[i].tags[j + 1]];
if (val.is_numeric()) {
preserve_numeric(key, val, full_keys, full_values,
accumulate_numeric,
keys, numeric_out_field, attribute_accum_state);
keys, numeric_out_field, attribute_accum_state, key_pool);
}
}
}
Expand All @@ -1373,8 +1374,8 @@ static void feature_out(std::vector<tile_feature> const &features, mvt_layer &ou
// and tag them onto the output feature

for (size_t i = 0; i < full_keys.size(); i++) {
if (should_keep(full_keys[i], keep, exclude, exclude_prefix)) {
outlayer.tag(outfeature, full_keys[i], full_values[i]);
if (should_keep(*full_keys[i], keep, exclude, exclude_prefix)) {
outlayer.tag(outfeature, *full_keys[i], full_values[i]);
}
}

Expand Down Expand Up @@ -1522,6 +1523,7 @@ mvt_tile assign_to_bins(mvt_tile &features,
std::set<std::string> exclude,
std::vector<std::string> exclude_prefix) {
std::vector<index_event> events;
key_pool key_pool;

// Index bins
for (size_t i = 0; i < bins.size(); i++) {
Expand Down Expand Up @@ -1678,7 +1680,7 @@ mvt_tile assign_to_bins(mvt_tile &features,
if (outfeatures[i].size() > 1) {
feature_out(outfeatures[i], outlayer,
keep, exclude, exclude_prefix, attribute_accum,
accumulate_numeric);
accumulate_numeric, key_pool);
mvt_feature &nfeature = outlayer.features.back();
mvt_value val;
val.type = mvt_uint;
Expand Down Expand Up @@ -1713,6 +1715,7 @@ std::string overzoom(std::vector<source_tile> const &tiles, int nz, int nx, int
std::vector<mvt_layer> const &bins, std::string const &bin_by_id_list,
std::string const &accumulate_numeric) {
mvt_tile outtile;
key_pool key_pool;

for (auto const &tile : tiles) {
for (auto const &layer : tile.tile.layers) {
Expand Down Expand Up @@ -1837,7 +1840,7 @@ std::string overzoom(std::vector<source_tile> const &tiles, int nz, int nx, int

if (flush_multiplier_cluster) {
if (pending_tile_features.size() > 0) {
feature_out(pending_tile_features, *outlayer, keep, exclude, exclude_prefix, attribute_accum, accumulate_numeric);
feature_out(pending_tile_features, *outlayer, keep, exclude, exclude_prefix, attribute_accum, accumulate_numeric, key_pool);
pending_tile_features.clear();
}
}
Expand Down Expand Up @@ -1894,7 +1897,7 @@ std::string overzoom(std::vector<source_tile> const &tiles, int nz, int nx, int
}

if (pending_tile_features.size() > 0) {
feature_out(pending_tile_features, *outlayer, keep, exclude, exclude_prefix, attribute_accum, accumulate_numeric);
feature_out(pending_tile_features, *outlayer, keep, exclude, exclude_prefix, attribute_accum, accumulate_numeric, key_pool);
pending_tile_features.clear();
}

Expand Down
5 changes: 3 additions & 2 deletions flatgeobuf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,9 @@ void readFeature(const FlatGeobuf::Feature *feature, long long feature_sequence_
sf.geometry = dv;
sf.t = drawvec_type;

std::vector<std::string> full_keys;
std::vector<std::shared_ptr<std::string>> full_keys;
std::vector<serial_val> full_values;
key_pool key_pool;

// assume tabular schema with columns in header
size_t p_pos = 0;
Expand Down Expand Up @@ -243,7 +244,7 @@ void readFeature(const FlatGeobuf::Feature *feature, long long feature_sequence_
fprintf(stderr, "flatgeobuf has unsupported column type %u\n", (unsigned int)col_type);
exit(EXIT_IMPOSSIBLE);
}
full_keys.push_back(h_column_names[col_idx]);
full_keys.push_back(key_pool.pool(h_column_names[col_idx]));
full_values.push_back(sv);
}

Expand Down
22 changes: 11 additions & 11 deletions serial.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ static void add_scaled_node(struct reader *r, serialization_state *sst, draw g)
}

// called from frontends
int serialize_feature(struct serialization_state *sst, serial_feature &sf, std::string const &layername) {
int serialize_feature(struct serialization_state *sst, serial_feature &sf, std::string const &layername, key_pool &key_pool) {
struct reader *r = &(*sst->readers)[sst->segment];

sf.bbox[0] = LLONG_MAX;
Expand Down Expand Up @@ -714,7 +714,7 @@ int serialize_feature(struct serialization_state *sst, serial_feature &sf, std::

bbox_index = encode_index(midx, midy);
if (additional[A_CALCULATE_INDEX]) {
sf.full_keys.push_back("tippecanoe:index");
sf.full_keys.push_back(key_pool.pool("tippecanoe:index"));

serial_val sv;
sv.type = mvt_double;
Expand Down Expand Up @@ -776,21 +776,21 @@ int serialize_feature(struct serialization_state *sst, serial_feature &sf, std::
for (auto &kv : set_attributes) {
bool found = false;
for (size_t i = 0; i < sf.full_keys.size(); i++) {
if (sf.full_keys[i] == kv.first) {
if (*sf.full_keys[i] == kv.first) {
sf.full_values[i] = kv.second;
found = true;
break;
}
}

if (!found) {
sf.full_keys.push_back(kv.first);
sf.full_keys.push_back(key_pool.pool(kv.first));
sf.full_values.push_back(kv.second);
}
}

for (ssize_t i = (ssize_t) sf.full_keys.size() - 1; i >= 0; i--) {
coerce_value(sf.full_keys[i], sf.full_values[i].type, sf.full_values[i].s, sst->attribute_types);
coerce_value(*sf.full_keys[i], sf.full_values[i].type, sf.full_values[i].s, sst->attribute_types);

if (prevent[P_SINGLE_PRECISION]) {
if (sf.full_values[i].type == mvt_double) {
Expand All @@ -801,12 +801,12 @@ int serialize_feature(struct serialization_state *sst, serial_feature &sf, std::
}
}

if (sf.full_keys[i] == attribute_for_id) {
if (*sf.full_keys[i] == attribute_for_id) {
if (sf.full_values[i].type != mvt_double && !additional[A_CONVERT_NUMERIC_IDS]) {
static bool warned = false;

if (!warned) {
fprintf(stderr, "Warning: Attribute \"%s\"=\"%s\" as feature ID is not a number\n", sf.full_keys[i].c_str(), sf.full_values[i].s.c_str());
fprintf(stderr, "Warning: Attribute \"%s\"=\"%s\" as feature ID is not a number\n", sf.full_keys[i]->c_str(), sf.full_values[i].s.c_str());
warned = true;
}
} else {
Expand Down Expand Up @@ -839,12 +839,12 @@ int serialize_feature(struct serialization_state *sst, serial_feature &sf, std::
}

if (sst->exclude_all) {
if (sst->include->count(sf.full_keys[i]) == 0) {
if (sst->include->count(*sf.full_keys[i]) == 0) {
sf.full_keys.erase(sf.full_keys.begin() + i);
sf.full_values.erase(sf.full_values.begin() + i);
continue;
}
} else if (sst->exclude->count(sf.full_keys[i]) != 0) {
} else if (sst->exclude->count(*sf.full_keys[i]) != 0) {
sf.full_keys.erase(sf.full_keys.begin() + i);
sf.full_values.erase(sf.full_values.begin() + i);
continue;
Expand All @@ -854,7 +854,7 @@ int serialize_feature(struct serialization_state *sst, serial_feature &sf, std::
if (!sst->filters) {
for (size_t i = 0; i < sf.full_keys.size(); i++) {
auto ts = sst->layermap->find(layername);
add_to_tilestats(ts->second.tilestats, sf.full_keys[i], sf.full_values[i]);
add_to_tilestats(ts->second.tilestats, *sf.full_keys[i], sf.full_values[i]);
}
}

Expand All @@ -867,7 +867,7 @@ int serialize_feature(struct serialization_state *sst, serial_feature &sf, std::
}

for (size_t i = 0; i < sf.full_keys.size(); i++) {
sf.keys.push_back(addpool(r->poolfile, r->treefile, sf.full_keys[i].c_str(), mvt_string, r->key_dedup));
sf.keys.push_back(addpool(r->poolfile, r->treefile, sf.full_keys[i]->c_str(), mvt_string, r->key_dedup));
sf.values.push_back(addpool(r->poolfile, r->treefile, sf.full_values[i].s.c_str(), sf.full_values[i].type, r->value_dedup));
}

Expand Down
19 changes: 18 additions & 1 deletion serial.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <string.h>
#include <vector>
#include <atomic>
#include <memory>
#include <sys/stat.h>
#include "geometry.hpp"
#include "mbtiles.hpp"
Expand Down Expand Up @@ -71,6 +72,22 @@ struct serial_val {
}
};

struct key_pool {
std::unordered_map<std::string, std::shared_ptr<std::string>> mapping;

std::shared_ptr<std::string> pool(std::string const &s) {
auto f = mapping.find(s);
if (f != mapping.end()) {
return f->second;
}

std::shared_ptr<std::string> p = std::make_shared<std::string>();
*p = s;
mapping.emplace(s, p);
return p;
}
};

struct serial_feature {
long long layer = 0;
int segment = 0;
Expand All @@ -95,7 +112,7 @@ struct serial_feature {
// to create the keys and values references into the string pool
// during initial serialization

std::vector<std::string> full_keys{};
std::vector<std::shared_ptr<std::string>> full_keys{};
std::vector<serial_val> full_values{};

// These fields are generated from full_keys and full_values
Expand Down
Loading

0 comments on commit ab95915

Please sign in to comment.