From 7e6d359fc10ed3a0eb409b859ee0362d20545549 Mon Sep 17 00:00:00 2001 From: Shaun Reed Date: Tue, 30 Jan 2024 16:29:28 -0500 Subject: [PATCH] Add C.41 Subarray constructor + Unstatus subarray_from_capnp --- test/support/src/serialization_wrappers.cc | 10 +- tiledb/sm/serialization/query.cc | 151 ++++++++++++--------- tiledb/sm/serialization/query.h | 8 +- tiledb/sm/subarray/subarray.cc | 26 ++++ tiledb/sm/subarray/subarray.h | 111 +++++++++------ 5 files changed, 197 insertions(+), 109 deletions(-) diff --git a/test/support/src/serialization_wrappers.cc b/test/support/src/serialization_wrappers.cc index fa7dec6fc129..0973739a67a0 100644 --- a/test/support/src/serialization_wrappers.cc +++ b/test/support/src/serialization_wrappers.cc @@ -204,11 +204,15 @@ void tiledb_subarray_serialize( .ok()); // Deserialize tiledb_subarray_t* deserialized_subarray; + auto layout = (*subarray)->subarray_->layout(); + auto stats = (*subarray)->subarray_->stats(); + shared_ptr dummy_logger = make_shared(HERE(), ""); + tiledb::test::require_tiledb_ok( ctx, tiledb_subarray_alloc(ctx, array, &deserialized_subarray)); - REQUIRE(tiledb::sm::serialization::subarray_from_capnp( - builder, deserialized_subarray->subarray_) - .ok()); + *deserialized_subarray->subarray_ = + tiledb::sm::serialization::subarray_from_capnp( + builder, array->array_.get(), layout, stats, dummy_logger); *subarray = deserialized_subarray; #endif } diff --git a/tiledb/sm/serialization/query.cc b/tiledb/sm/serialization/query.cc index 8a716ca82b74..ec81144a5493 100644 --- a/tiledb/sm/serialization/query.cc +++ b/tiledb/sm/serialization/query.cc @@ -265,39 +265,61 @@ Status subarray_to_capnp( return Status::Ok(); } -Status subarray_from_capnp( - const capnp::Subarray::Reader& reader, Subarray* subarray) { - RETURN_NOT_OK(subarray->set_coalesce_ranges(reader.getCoalesceRanges())); +Subarray subarray_from_capnp( + const capnp::Subarray::Reader& reader, + const Array* array, + Layout layout, + stats::Stats* parent_stats, + shared_ptr logger) { + bool coalesce_ranges = reader.getCoalesceRanges(); auto ranges_reader = reader.getRanges(); + uint32_t dim_num = ranges_reader.size(); + std::vector range_subset(dim_num); + std::vector is_default(dim_num, false); for (uint32_t i = 0; i < dim_num; i++) { auto range_reader = ranges_reader[i]; Datatype type = Datatype::UINT8; - RETURN_NOT_OK(datatype_enum(range_reader.getType(), &type)); + throw_if_not_ok(datatype_enum(range_reader.getType(), &type)); + auto dim = array->array_schema_latest().dimension_ptr(i); - auto data = range_reader.getBuffer(); - auto data_ptr = data.asBytes(); + bool implicitly_initialized = range_reader.getHasDefaultRange(); + range_subset[i] = + RangeSetAndSuperset(dim->type(), dim->domain(), true, coalesce_ranges); + is_default[i] = implicitly_initialized; if (range_reader.hasBufferSizes()) { auto ranges = range_buffers_from_capnp(range_reader); - RETURN_NOT_OK(subarray->set_ranges_for_dim(i, ranges)); - - // Set default indicator - subarray->set_is_default(i, range_reader.getHasDefaultRange()); + // If the range is implicitly initialized, the RangeSetAndSuperset + // constructor will initialize the ranges to the domain. + if (!implicitly_initialized) { + // Edge case for dimension labels where there are only label ranges set. + if (ranges.empty()) { + range_subset[i] = RangeSetAndSuperset( + dim->type(), dim->domain(), false, coalesce_ranges); + } + // Add custom ranges, clearing any implicit ranges previously set. + for (const auto& range : ranges) { + throw_if_not_ok(range_subset[i].add_range_unrestricted(range)); + } + } } else { // Handle 1.7 style ranges where there is a single range with no sizes + auto data = range_reader.getBuffer(); + auto data_ptr = data.asBytes(); Range range(data_ptr.begin(), data.size()); - RETURN_NOT_OK(subarray->set_ranges_for_dim(i, {range})); - subarray->set_is_default(i, range_reader.getHasDefaultRange()); + throw_if_not_ok(range_subset[i].add_range_unrestricted(range)); } } + std::vector> label_range_subset( + dim_num, nullopt); if (reader.hasLabelRanges()) { - subarray->add_default_label_ranges(dim_num); auto label_ranges_reader = reader.getLabelRanges(); uint32_t label_num = label_ranges_reader.size(); for (uint32_t i = 0; i < label_num; i++) { auto label_range_reader = label_ranges_reader[i]; - auto dim_id = label_range_reader.getDimensionId(); + auto dim_index = label_range_reader.getDimensionId(); + auto dim = array->array_schema_latest().dimension_ptr(dim_index); auto label_name = label_range_reader.getName(); // Deserialize ranges for this dim label @@ -305,49 +327,50 @@ Status subarray_from_capnp( auto ranges = range_buffers_from_capnp(range_reader); // Set ranges for this dim label on the subarray - subarray->set_label_ranges_for_dim(dim_id, label_name, ranges); + label_range_subset[dim_index] = { + label_name, dim->type(), coalesce_ranges}; } } + std::unordered_map> attr_range_subset; if (reader.hasAttributeRanges()) { - std::unordered_map> attr_ranges; auto attr_ranges_reader = reader.getAttributeRanges(); if (attr_ranges_reader.hasEntries()) { - for (auto attr_ranges_entry : attr_ranges_reader.getEntries()) { - auto range_reader = attr_ranges_entry.getValue(); - auto key = std::string_view{ - attr_ranges_entry.getKey().cStr(), - attr_ranges_entry.getKey().size()}; - attr_ranges[std::string{key}] = range_buffers_from_capnp(range_reader); + for (auto entry : attr_ranges_reader.getEntries()) { + auto range_reader = entry.getValue(); + std::string key{entry.getKey().cStr(), entry.getKey().size()}; + attr_range_subset[key] = range_buffers_from_capnp(range_reader); } } - - for (const auto& attr_range : attr_ranges) - subarray->set_attribute_ranges(attr_range.first, attr_range.second); } // If cap'n proto object has stats set it on c++ object + Subarray s(array, layout, parent_stats, logger, true); if (reader.hasStats()) { - stats::Stats* stats = subarray->stats(); - // We should always have a stats here - if (stats != nullptr) { - RETURN_NOT_OK(stats_from_capnp(reader.getStats(), stats)); - } + throw_if_not_ok(stats_from_capnp(reader.getStats(), s.stats())); } + std::vector relevant_fragments; if (reader.hasRelevantFragments()) { - auto relevant_fragments = reader.getRelevantFragments(); - size_t count = relevant_fragments.size(); - std::vector rf; - rf.reserve(count); + auto reader_rf = reader.getRelevantFragments(); + size_t count = reader_rf.size(); + relevant_fragments.reserve(count); for (size_t i = 0; i < count; i++) { - rf.emplace_back(relevant_fragments[i]); + relevant_fragments.emplace_back(reader_rf[i]); } - - subarray->relevant_fragments() = RelevantFragments(rf); } - return Status::Ok(); + return { + array, + layout, + reader.hasStats() ? s.stats() : parent_stats, + logger, + range_subset, + is_default, + label_range_subset, + attr_range_subset, + relevant_fragments, + coalesce_ranges}; } Status subarray_partitioner_to_capnp( @@ -461,8 +484,8 @@ Status subarray_partitioner_from_capnp( RETURN_NOT_OK(layout_enum(subarray_reader.getLayout(), &layout)); // Subarray, which is used to initialize the partitioner. - Subarray subarray(array, layout, query_stats, dummy_logger, true); - RETURN_NOT_OK(subarray_from_capnp(reader.getSubarray(), &subarray)); + Subarray subarray = subarray_from_capnp( + subarray_reader, array, layout, query_stats, dummy_logger); *partitioner = SubarrayPartitioner( &config, subarray, @@ -519,10 +542,12 @@ Status subarray_partitioner_from_capnp( partition_info->end_ = partition_info_reader.getEnd(); partition_info->split_multi_range_ = partition_info_reader.getSplitMultiRange(); - partition_info->partition_ = - Subarray(array, layout, query_stats, dummy_logger, true); - RETURN_NOT_OK(subarray_from_capnp( - partition_info_reader.getSubarray(), &partition_info->partition_)); + partition_info->partition_ = subarray_from_capnp( + partition_info_reader.getSubarray(), + array, + layout, + query_stats, + dummy_logger); if (compute_current_tile_overlap) { throw_if_not_ok(partition_info->partition_.precompute_tile_overlap( @@ -542,20 +567,18 @@ Status subarray_partitioner_from_capnp( auto sr_reader = state_reader.getSingleRange(); const unsigned num_sr = sr_reader.size(); for (unsigned i = 0; i < num_sr; i++) { - auto subarray_reader_ = sr_reader[i]; - state->single_range_.emplace_back( - array, layout, query_stats, dummy_logger, true); - Subarray& subarray_ = state->single_range_.back(); - RETURN_NOT_OK(subarray_from_capnp(subarray_reader_, &subarray_)); + auto subarray_reader = sr_reader[i]; + Subarray subarray = subarray_from_capnp( + subarray_reader, array, layout, query_stats, dummy_logger); + state->single_range_.push_back(subarray); } auto m_reader = state_reader.getMultiRange(); const unsigned num_m = m_reader.size(); for (unsigned i = 0; i < num_m; i++) { - auto subarray_reader_ = m_reader[i]; - state->multi_range_.emplace_back( - array, layout, query_stats, dummy_logger, true); - Subarray& subarray_ = state->multi_range_.back(); - RETURN_NOT_OK(subarray_from_capnp(subarray_reader_, &subarray_)); + auto subarray_reader = m_reader[i]; + Subarray subarray = subarray_from_capnp( + subarray_reader, array, layout, query_stats, dummy_logger); + state->multi_range_.push_back(subarray); } // Overall mem budget @@ -1131,9 +1154,9 @@ Status reader_from_capnp( RETURN_NOT_OK(layout_enum(reader_reader.getLayout(), &layout)); // Subarray - Subarray subarray(array, layout, query->stats(), dummy_logger, true); auto subarray_reader = reader_reader.getSubarray(); - RETURN_NOT_OK(subarray_from_capnp(subarray_reader, &subarray)); + Subarray subarray = subarray_from_capnp( + subarray_reader, array, layout, query->stats(), dummy_logger); RETURN_NOT_OK(query->set_subarray_unsafe(subarray)); // Read state @@ -1172,9 +1195,9 @@ Status index_reader_from_capnp( RETURN_NOT_OK(layout_enum(reader_reader.getLayout(), &layout)); // Subarray - Subarray subarray(array, layout, query->stats(), dummy_logger, true); auto subarray_reader = reader_reader.getSubarray(); - RETURN_NOT_OK(subarray_from_capnp(subarray_reader, &subarray)); + Subarray subarray = subarray_from_capnp( + subarray_reader, array, layout, query->stats(), dummy_logger); RETURN_NOT_OK(query->set_subarray_unsafe(subarray)); // Read state @@ -1214,9 +1237,9 @@ Status dense_reader_from_capnp( RETURN_NOT_OK(layout_enum(reader_reader.getLayout(), &layout)); // Subarray - Subarray subarray(array, layout, query->stats(), dummy_logger, true); auto subarray_reader = reader_reader.getSubarray(); - RETURN_NOT_OK(subarray_from_capnp(subarray_reader, &subarray)); + Subarray subarray = subarray_from_capnp( + subarray_reader, array, layout, query->stats(), dummy_logger); RETURN_NOT_OK(query->set_subarray_unsafe(subarray)); // Read state @@ -2253,9 +2276,9 @@ Status query_from_capnp( // Subarray if (writer_reader.hasSubarrayRanges()) { - Subarray subarray(array, layout, query->stats(), dummy_logger, true); auto subarray_reader = writer_reader.getSubarrayRanges(); - RETURN_NOT_OK(subarray_from_capnp(subarray_reader, &subarray)); + Subarray subarray = subarray_from_capnp( + subarray_reader, array, layout, query->stats(), dummy_logger); RETURN_NOT_OK(query->set_subarray_unsafe(subarray)); } } @@ -3203,9 +3226,9 @@ void ordered_dim_label_reader_from_capnp( throw_if_not_ok(layout_enum(reader_reader.getLayout(), &layout)); // Subarray - Subarray subarray(array, layout, query->stats(), dummy_logger, false); auto subarray_reader = reader_reader.getSubarray(); - throw_if_not_ok(subarray_from_capnp(subarray_reader, &subarray)); + Subarray subarray = subarray_from_capnp( + subarray_reader, array, layout, query->stats(), dummy_logger); throw_if_not_ok(query->set_subarray_unsafe(subarray)); // OrderedDimLabelReader requires an initialized subarray for construction. diff --git a/tiledb/sm/serialization/query.h b/tiledb/sm/serialization/query.h index 1a711db6b7de..268a5a5ad9aa 100644 --- a/tiledb/sm/serialization/query.h +++ b/tiledb/sm/serialization/query.h @@ -251,8 +251,12 @@ Status subarray_to_capnp( const Subarray* subarray, capnp::Subarray::Builder* builder); -Status subarray_from_capnp( - const capnp::Subarray::Reader& reader, Subarray* subarray); +Subarray subarray_from_capnp( + const capnp::Subarray::Reader& reader, + const Array* array, + Layout layout, + stats::Stats* parent_stats, + shared_ptr logger); void ordered_dim_label_reader_to_capnp( const Query& query, diff --git a/tiledb/sm/subarray/subarray.cc b/tiledb/sm/subarray/subarray.cc index 4f4d6e62f5e5..d9d87b797c9a 100644 --- a/tiledb/sm/subarray/subarray.cc +++ b/tiledb/sm/subarray/subarray.cc @@ -150,6 +150,32 @@ Subarray::Subarray( add_default_ranges(); } +Subarray::Subarray( + const Array* array, + Layout layout, + stats::Stats* stats, + shared_ptr logger, + std::vector range_subset, + std::vector is_default, + std::vector> label_range_subset, + std::unordered_map> attr_range_subset, + std::vector relevant_fragments, + bool coalesce_ranges) + : stats_(stats) + , logger_(std::move(logger)) + , array_(array->opened_array()) + , layout_(layout) + , cell_order_(array_->array_schema_latest().cell_order()) + , range_subset_(std::move(range_subset)) + , label_range_subset_(std::move(label_range_subset)) + , attr_range_subset_(std::move(attr_range_subset)) + , is_default_(std::move(is_default)) + , est_result_size_computed_(false) + , relevant_fragments_(relevant_fragments) + , coalesce_ranges_(coalesce_ranges) + , ranges_sorted_(false) { +} + Subarray::Subarray(const Subarray& subarray) : Subarray() { // Make a deep-copy clone diff --git a/tiledb/sm/subarray/subarray.h b/tiledb/sm/subarray/subarray.h index 4b2fa5563f09..6f4ff939a13d 100644 --- a/tiledb/sm/subarray/subarray.h +++ b/tiledb/sm/subarray/subarray.h @@ -205,6 +205,46 @@ class Subarray { uint64_t size_validity_; }; + /** + * Wrapper for optional> for + * cleaner data access. + */ + struct LabelRangeSubset { + public: + /** + * Default constructor is not C.41. + **/ + LabelRangeSubset() = delete; + + /** + * Constructor + * + * @param ref Dimension label the ranges will be set on. + * @param coalesce_ranges Set if ranges should be combined when adjacent. + */ + LabelRangeSubset(const DimensionLabel& ref, bool coalesce_ranges = true); + + /** + * Constructor + * + * @param name The name of the dimension label the ranges will be set on. + * @param type The type of the label the ranges will be set on. + * @param coalesce_ranges Set if ranges should be combined when adjacent. + */ + LabelRangeSubset( + const std::string& name, Datatype type, bool coalesce_ranges = true); + + inline const std::vector& get_ranges() const { + return ranges_.ranges(); + } + + /** Name of the dimension label. */ + std::string name_; + + /** The ranges set on the dimension label. */ + RangeSetAndSuperset ranges_; + }; + /* ********************************* */ /* CONSTRUCTORS & DESTRUCTORS */ /* ********************************* */ @@ -268,6 +308,37 @@ class Subarray { bool coalesce_ranges = true, StorageManager* storage_manager = nullptr); + /** + * Constructor. + * + * @param opened_array The opened array the subarray is associated with. + * @param layout The layout of the values of the subarray (of the results + * if the subarray is used for reads, or of the values provided + * by the user for writes). + * @param parent_stats The parent stats to inherit from. + * @param logger The parent logger to clone and use for logging + * @param range_subset Vector of RangeSetAndSuperset for each dimension. + * @param is_default Vector of boolean indicating if the range is default. + * @param label_range_subset Vector of optional for each + * dimension. + * @param attr_range_subset Map of attribute name to a vector of Ranges, for + * each attribute. + * @param relevant_fragments RelevantFragments object for the subarray. + * @param coalesce_ranges When enabled, ranges will attempt to coalesce + * with existing ranges as they are added + */ + Subarray( + const Array* array, + Layout layout, + stats::Stats* stats, + shared_ptr logger, + std::vector range_subset, + std::vector is_default, + std::vector> label_range_subset, + std::unordered_map> attr_range_subset, + std::vector relevant_fragments, + bool coalesce_ranges = true); + /** * Copy constructor. This performs a deep copy (including memcpy of * underlying buffers). @@ -1332,46 +1403,6 @@ class Subarray { uint64_t range_len_; }; - /** - * Wrapper for optional> for - * cleaner data access. - */ - struct LabelRangeSubset { - public: - /** - * Default constructor is not C.41. - **/ - LabelRangeSubset() = delete; - - /** - * Constructor - * - * @param ref Dimension label the ranges will be set on. - * @param coalesce_ranges Set if ranges should be combined when adjacent. - */ - LabelRangeSubset(const DimensionLabel& ref, bool coalesce_ranges = true); - - /** - * Constructor - * - * @param name The name of the dimension label the ranges will be set on. - * @param type The type of the label the ranges will be set on. - * @param coalesce_ranges Set if ranges should be combined when adjacent. - */ - LabelRangeSubset( - const std::string& name, Datatype type, bool coalesce_ranges = true); - - inline const std::vector& get_ranges() const { - return ranges_.ranges(); - } - - /** Name of the dimension label. */ - std::string name_; - - /** The ranges set on the dimension label. */ - RangeSetAndSuperset ranges_; - }; - /** * A hash function capable of hashing std::vector for use by * the tile_coords_map_ unordered_map for caching coords indices.