Skip to content

Commit

Permalink
Remove serialization non C41 constructors from stats
Browse files Browse the repository at this point in the history
With this change in, PR #4685 and others to follow should:

a) remove Subarray::set_stats and respective call from serialization/query.cc
b) where Subarray is constructed in serialization/query.cc, change constructor
call with the following code snippet:
 auto &stats_data = stats_from_capnp(reader.getStats());
 Subarray subarray(array, layout, query_stats, stats_data, dummy_logger, true);
c) The constructor calls parent_stats->create_child(prefix, stats_data);
d) When all migrations are done, make Stats::populate_with_data private
  • Loading branch information
robertbindar committed Feb 8, 2024
1 parent 47b3eb8 commit 5590ded
Show file tree
Hide file tree
Showing 12 changed files with 192 additions and 59 deletions.
4 changes: 4 additions & 0 deletions tiledb/sm/query/query.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1663,6 +1663,10 @@ stats::Stats* Query::stats() const {
return stats_;
}

void Query::set_stats(const stats::StatsData& data) {
stats_->populate_with_data(data);
}

shared_ptr<Buffer> Query::rest_scratch() const {
return rest_scratch_;
}
Expand Down
5 changes: 5 additions & 0 deletions tiledb/sm/query/query.h
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,11 @@ class Query {
/** Returns the internal stats object. */
stats::Stats* stats() const;

/** Populate the owned stats instance with data.
* To be removed when the class will get a C41 constructor.
*/
void set_stats(const stats::StatsData& data);

/** Returns the scratch space used for REST requests. */
shared_ptr<Buffer> rest_scratch() const;

Expand Down
4 changes: 4 additions & 0 deletions tiledb/sm/query/strategy_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ stats::Stats* StrategyBase::stats() const {
return stats_;
}

void StrategyBase::set_stats(const stats::StatsData& data) {
stats_->populate_with_data(data);
}

/* ****************************** */
/* PROTECTED METHODS */
/* ****************************** */
Expand Down
5 changes: 5 additions & 0 deletions tiledb/sm/query/strategy_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,11 @@ class StrategyBase {
/** Returns `stats_`. */
stats::Stats* stats() const;

/** Populate the owned stats instance with data.
* To be removed when the class will get a C41 constructor.
*/
void set_stats(const stats::StatsData& data);

/** Returns the configured offsets format mode. */
std::string offsets_mode() const;

Expand Down
77 changes: 25 additions & 52 deletions tiledb/sm/serialization/query.cc
Original file line number Diff line number Diff line change
Expand Up @@ -118,27 +118,27 @@ Status stats_to_capnp(Stats& stats, capnp::Stats::Builder* stats_builder) {
return Status::Ok();
}

Status stats_from_capnp(
const capnp::Stats::Reader& stats_reader, Stats* stats) {
StatsData stats_from_capnp(const capnp::Stats::Reader& stats_reader) {
std::unordered_map<std::string, uint64_t> counters;
std::unordered_map<std::string, double> timers;

if (stats_reader.hasCounters()) {
auto counters = stats->counters();
auto counters_reader = stats_reader.getCounters();
for (const auto entry : counters_reader.getEntries()) {
auto key = std::string_view{entry.getKey().cStr(), entry.getKey().size()};
(*counters)[std::string{key}] = entry.getValue();
counters[std::string(key)] = entry.getValue();
}
}

if (stats_reader.hasTimers()) {
auto timers = stats->timers();
auto timers_reader = stats_reader.getTimers();
for (const auto entry : timers_reader.getEntries()) {
auto key = std::string_view{entry.getKey().cStr(), entry.getKey().size()};
(*timers)[std::string{key}] = entry.getValue();
timers[std::string(key)] = entry.getValue();
}
}

return Status::Ok();
return stats::StatsData(counters, timers);
}

void range_buffers_to_capnp(
Expand Down Expand Up @@ -328,11 +328,8 @@ Status subarray_from_capnp(

// If cap'n proto object has stats set it on c++ object
if (reader.hasStats()) {
stats::Stats* stats = subarray->stats();
// We should always have a stats here
if (stats != nullptr) {
RETURN_NOT_OK(stats_from_capnp(reader.getStats(), stats));
}
auto stats_data = stats_from_capnp(reader.getStats());
subarray->set_stats(stats_data);
}

if (reader.hasRelevantFragments()) {
Expand Down Expand Up @@ -566,11 +563,8 @@ Status subarray_partitioner_from_capnp(

// If cap'n proto object has stats set it on c++ object
if (reader.hasStats()) {
auto stats = partitioner->stats();
// We should always have stats
if (stats != nullptr) {
RETURN_NOT_OK(stats_from_capnp(reader.getStats(), stats));
}
auto stats_data = stats_from_capnp(reader.getStats());
partitioner->set_stats(stats_data);
}

return Status::Ok();
Expand Down Expand Up @@ -1146,11 +1140,8 @@ Status reader_from_capnp(

// If cap'n proto object has stats set it on c++ object
if (reader_reader.hasStats()) {
stats::Stats* stats = reader->stats();
// We should always have a stats here
if (stats != nullptr) {
RETURN_NOT_OK(stats_from_capnp(reader_reader.getStats(), stats));
}
auto stats_data = stats_from_capnp(reader_reader.getStats());
reader->set_stats(stats_data);
}

return Status::Ok();
Expand Down Expand Up @@ -1187,11 +1178,8 @@ Status index_reader_from_capnp(

// If cap'n proto object has stats set it on c++ object
if (reader_reader.hasStats()) {
stats::Stats* stats = reader->stats();
// We should always have a stats here
if (stats != nullptr) {
RETURN_NOT_OK(stats_from_capnp(reader_reader.getStats(), stats));
}
auto stats_data = stats_from_capnp(reader_reader.getStats());
reader->set_stats(stats_data);
}

return Status::Ok();
Expand Down Expand Up @@ -1229,11 +1217,8 @@ Status dense_reader_from_capnp(

// If cap'n proto object has stats set it on c++ object
if (reader_reader.hasStats()) {
stats::Stats* stats = reader->stats();
// We should always have a stats here
if (stats != nullptr) {
RETURN_NOT_OK(stats_from_capnp(reader_reader.getStats(), stats));
}
auto stats_data = stats_from_capnp(reader_reader.getStats());
reader->set_stats(stats_data);
}

return Status::Ok();
Expand All @@ -1252,11 +1237,8 @@ Status delete_from_capnp(

// If cap'n proto object has stats set it on c++ object
if (delete_reader.hasStats()) {
stats::Stats* stats = delete_strategy->stats();
// We should always have a stats here
if (stats != nullptr) {
RETURN_NOT_OK(stats_from_capnp(delete_reader.getStats(), stats));
}
auto stats_data = stats_from_capnp(delete_reader.getStats());
delete_strategy->set_stats(stats_data);
}

return Status::Ok();
Expand Down Expand Up @@ -1341,11 +1323,8 @@ Status writer_from_capnp(

// If cap'n proto object has stats set it on c++ object
if (writer_reader.hasStats()) {
stats::Stats* stats = writer->stats();
// We should always have a stats here
if (stats != nullptr) {
RETURN_NOT_OK(stats_from_capnp(writer_reader.getStats(), stats));
}
auto stats_data = stats_from_capnp(writer_reader.getStats());
writer->set_stats(stats_data);
}

if (query.layout() == Layout::GLOBAL_ORDER &&
Expand Down Expand Up @@ -2270,11 +2249,8 @@ Status query_from_capnp(

// If cap'n proto object has stats set it on c++ object
if (query_reader.hasStats()) {
stats::Stats* stats = query->stats();
// We should always have a stats here
if (stats != nullptr) {
RETURN_NOT_OK(stats_from_capnp(query_reader.getStats(), stats));
}
auto stats_data = stats_from_capnp(query_reader.getStats());
query->set_stats(stats_data);
}

if (query_reader.hasWrittenFragmentInfo()) {
Expand Down Expand Up @@ -3212,11 +3188,8 @@ void ordered_dim_label_reader_from_capnp(

// If cap'n proto object has stats set it on c++ object
if (reader_reader.hasStats()) {
stats::Stats* stats = reader->stats();
// We should always have a stats here
if (stats != nullptr) {
throw_if_not_ok(stats_from_capnp(reader_reader.getStats(), stats));
}
auto stats_data = stats_from_capnp(reader_reader.getStats());
reader->set_stats(stats_data);
}
}

Expand Down
68 changes: 66 additions & 2 deletions tiledb/sm/stats/global_stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,71 @@
#include "tiledb/common/heap_memory.h"
#include "tiledb/sm/stats/stats.h"

/**
* Documenting the current stats behavior and architecture as close as
* possible to the code so it's helpful next time some tries to refactor.
*
* Statistics collection is done at the top level via the GlobalStats class
* defined in this file.
* We maintain a global object called `all_stats` which is used to register
* Stats objects, enable/disable collection, reset or dumping the collected
* stats.
*
* The TileDB C API uses the `all_stats` object directly to execute the
* actions iterated above.
*
* The GlobalStats class owns a list called `registered_stats` that has one
* Stats object registered for each Context used. In consequence,
* ContextResources register a Stats object for each Context created, this
* object serves as the root for the tree of all children Stats used in a
* Context.
*
* As mentioned above, the Stats objects used under a Context form a tree.
* Each Stats object mentains a list of children Stats and a pointer to the
* parent Stats object.
* The Stats object created by ContextResources(named "Context.StorageManager")
* is the only Stats constructed in a standalone fashion using the Stats
* constructor, all the other objects under this root Stats are created via
* the Stats::create_child API.
*
* The (current, please update if not accurate anymore) exhaustive list of
* Stats we maintain under a Context is:
* ---------------------------
* ContextResources
* - Query
* - Reader
* - Writer
* - DenseTiler
* - Subarray
* - Deletes
* - Subarray
* - subSubarray
* - SubarrayPartitioner
* - VFS
* - S3
* - ArrayDirectory
* - RestClient
* - Consolidator
* ---------------------------
* Please visualize this as a tree, it was much easier to write
* like this, the tree is too wide.
*
*
* Observed issues:
* - Stats objects currently are created via Stats::create_child API from a
* parent stats object. Child objects such as e.g. Subarray only hold a
* pointer to the Stats object, this means that the Stats objects outlive
* the objects they represent and are kept alive by the tree like structure
* defined by the Stats class.
* In theory, a Context running for a long time would OOM the machine with
* Stats objects.
*
* - Stats::populate_flattened_stats aggregate the collected statistic via
* summing. But we also collect ".min", ".max" statistics as well,
* sum-aggregating those is incorrect. Currently the dump function just
* doesn't print those statistics.
*/

namespace tiledb {
namespace sm {
namespace stats {
Expand Down Expand Up @@ -158,8 +223,7 @@ class GlobalStats {
/* ********************************* */

/**
* The singleton instance holding all global stats counters. The report will
* be automatically made when this object is destroyed (at program termination).
* The singleton instance holding all global stats counters and timers.
*/
extern GlobalStats all_stats;

Expand Down
26 changes: 23 additions & 3 deletions tiledb/sm/stats/stats.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,14 @@ namespace stats {
/* ****************************** */

Stats::Stats(const std::string& prefix)
: Stats(prefix, StatsData{}) {
}

Stats::Stats(const std::string& prefix, const StatsData& data)
: enabled_(true)
, prefix_(prefix + ".")
, parent_(nullptr) {
this->populate_with_data(data);
}

/* ****************************** */
Expand Down Expand Up @@ -246,8 +251,12 @@ Stats* Stats::parent() {
}

Stats* Stats::create_child(const std::string& prefix) {
return create_child(prefix, StatsData{});
}

Stats* Stats::create_child(const std::string& prefix, const StatsData& data) {
std::unique_lock<std::mutex> lck(mtx_);
children_.emplace_back(prefix_ + prefix);
children_.emplace_back(prefix_ + prefix, data);
Stats* const child = &children_.back();
child->parent_ = this;
return child;
Expand All @@ -272,15 +281,26 @@ void Stats::populate_flattened_stats(
}
}

std::unordered_map<std::string, double>* Stats::timers() {
const std::unordered_map<std::string, double>* Stats::timers() const {
return &timers_;
}

/** Return pointer to conters map, used for serialization only. */
std::unordered_map<std::string, uint64_t>* Stats::counters() {
const std::unordered_map<std::string, uint64_t>* Stats::counters() const {
return &counters_;
}

void Stats::populate_with_data(const StatsData& data) {
auto timers = data.timers();
for (const auto& timer : timers) {
timers_[timer.first] = timer.second;
}
auto counters = data.counters();
for (const auto& counter : counters) {
counters_[counter.first] = counter.second;
}
}

} // namespace stats
} // namespace sm
} // namespace tiledb
Loading

0 comments on commit 5590ded

Please sign in to comment.