Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix serialization issue with schema evolution for query v3. #5154

Merged
merged 2 commits into from
Jul 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 22 additions & 56 deletions test/src/unit-cppapi-schema-evolution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
* Tests the C++ API for schema evolution.
*/

#include <test/support/src/vfs_helpers.h>
#include <test/support/tdb_catch.h>
#include "test/support/src/mem_helpers.h"
#include "tiledb/sm/array_schema/array_schema.h"
Expand All @@ -47,12 +48,11 @@

TEST_CASE(
"C++ API: SchemaEvolution, add and drop attributes",
"[cppapi][schema][evolution][add][drop]") {
"[cppapi][schema][evolution][add][drop][rest]") {
using namespace tiledb;
Context ctx;
VFS vfs(ctx);

std::string array_uri = "test_schema_evolution_array";
test::VFSTestSetup vfs_test_setup;
Context ctx{vfs_test_setup.ctx()};
auto array_uri{vfs_test_setup.array_uri("test_schema_evolution_array")};

Domain domain(ctx);
auto id1 = Dimension::create<int>(ctx, "d1", {{-100, 100}}, 10);
Expand All @@ -71,10 +71,6 @@ TEST_CASE(
schema.set_cell_order(TILEDB_ROW_MAJOR);
schema.set_tile_order(TILEDB_COL_MAJOR);

if (vfs.is_dir(array_uri)) {
vfs.remove_dir(array_uri);
}

Array::create(array_uri, schema);

auto evolution = ArraySchemaEvolution(ctx);
Expand All @@ -100,21 +96,15 @@ TEST_CASE(
CHECK(attrs.count("a1") == 0);
CHECK(attrs.count("a2") == 1);
CHECK(attrs.count("a3") == 1);

// Clean up
if (vfs.is_dir(array_uri)) {
vfs.remove_dir(array_uri);
}
}

TEST_CASE(
"C++ API: SchemaEvolution, check error when dropping dimension",
"[cppapi][schema][evolution][drop]") {
"[cppapi][schema][evolution][drop][rest]") {
using namespace tiledb;
Context ctx;
VFS vfs(ctx);

std::string array_uri = "test_schema_evolution_array";
test::VFSTestSetup vfs_test_setup;
Context ctx{vfs_test_setup.ctx()};
auto array_uri{vfs_test_setup.array_uri("test_schema_evolution_array")};

Domain domain(ctx);
auto id1 = Dimension::create<int>(ctx, "d1", {{-100, 100}}, 10);
Expand All @@ -131,10 +121,6 @@ TEST_CASE(
schema.set_cell_order(TILEDB_ROW_MAJOR);
schema.set_tile_order(TILEDB_COL_MAJOR);

if (vfs.is_dir(array_uri)) {
vfs.remove_dir(array_uri);
}

Array::create(array_uri, schema);

auto evolution = ArraySchemaEvolution(ctx);
Expand All @@ -144,27 +130,22 @@ TEST_CASE(

// check that an exception is thrown
CHECK_THROWS(evolution.array_evolve(array_uri));

// Clean up
if (vfs.is_dir(array_uri)) {
vfs.remove_dir(array_uri);
}
}

TEST_CASE(
"C++ API: SchemaEvolution, add attributes and read",
"[cppapi][schema][evolution][add]") {
"[cppapi][schema][evolution][add][rest]") {
using namespace tiledb;
Context ctx;
VFS vfs(ctx);
test::VFSTestSetup vfs_test_setup;
Context ctx{vfs_test_setup.ctx()};

auto layout = GENERATE(
TILEDB_ROW_MAJOR,
TILEDB_COL_MAJOR,
TILEDB_UNORDERED,
TILEDB_GLOBAL_ORDER);
bool duplicates = GENERATE(true, false);

std::string array_uri = "test_schema_evolution_array_read";
auto array_uri{vfs_test_setup.array_uri("test_schema_evolution_array")};

// Create
{
Expand All @@ -183,10 +164,6 @@ TEST_CASE(
schema.set_cell_order(TILEDB_ROW_MAJOR);
schema.set_tile_order(TILEDB_COL_MAJOR);

if (vfs.is_dir(array_uri)) {
vfs.remove_dir(array_uri);
}

Array::create(array_uri, schema);
}

Expand Down Expand Up @@ -464,9 +441,11 @@ TEST_CASE(
// test case.
Config cfg;
cfg["sm.merge_overlapping_ranges_experimental"] = "false";
vfs_test_setup.update_config(cfg.ptr().get());
// + Global order does not support multi-range subarrays
if (layout != TILEDB_GLOBAL_ORDER) {
ctx = Context(cfg);
ctx = vfs_test_setup.ctx();

Array array(ctx, array_uri, TILEDB_READ);

std::vector<int> a_data(8);
Expand Down Expand Up @@ -683,19 +662,14 @@ TEST_CASE(
1, 1, 1, 1, 3, 3, 3, 3, 4, 4, 4, 4, 1, 1, 1, 1}));
}
}

// Clean up
if (vfs.is_dir(array_uri)) {
vfs.remove_dir(array_uri);
}
}

TEST_CASE(
"C++ API: SchemaEvolution, add and drop attributes",
"[cppapi][schema][evolution][add][query-condition]") {
"[cppapi][schema][evolution][add][query-condition][rest]") {
using namespace tiledb;
Context ctx;
VFS vfs(ctx);
test::VFSTestSetup vfs_test_setup;
Context ctx{vfs_test_setup.ctx()};
auto layout = GENERATE(
TILEDB_ROW_MAJOR,
TILEDB_COL_MAJOR,
Expand All @@ -705,7 +679,8 @@ TEST_CASE(

const char* out_str = nullptr;
tiledb_layout_to_str(layout, &out_str);
std::string array_uri = "test_schema_evolution_query_condition";
auto array_uri{
vfs_test_setup.array_uri("test_schema_evolution_query_condition")};

{
Domain domain(ctx);
Expand All @@ -723,10 +698,6 @@ TEST_CASE(
schema.set_cell_order(TILEDB_ROW_MAJOR);
schema.set_tile_order(TILEDB_COL_MAJOR);

if (vfs.is_dir(array_uri)) {
vfs.remove_dir(array_uri);
}

Array::create(array_uri, schema);
}

Expand Down Expand Up @@ -833,11 +804,6 @@ TEST_CASE(
CHECK_THAT(d1_data, Catch::Matchers::Equals(std::vector<int>{4}));
CHECK_THAT(d2_data, Catch::Matchers::Equals(std::vector<int>{1}));
}

// Cleanup.
if (vfs.is_dir(array_uri)) {
vfs.remove_dir(array_uri);
}
}

TEST_CASE(
Expand Down
2 changes: 1 addition & 1 deletion tiledb/sm/fragment/fragment_metadata.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1349,7 +1349,7 @@ URI FragmentMetadata::validity_uri(const std::string& name) const {
encoded_name + "_validity" + constants::file_suffix);
}

const std::string& FragmentMetadata::array_schema_name() {
const std::string& FragmentMetadata::array_schema_name() const {
return array_schema_name_;
}

Expand Down
2 changes: 1 addition & 1 deletion tiledb/sm/fragment/fragment_metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -640,7 +640,7 @@ class FragmentMetadata {
URI validity_uri(const std::string& name) const;

/** Return the array schema name. */
const std::string& array_schema_name();
const std::string& array_schema_name() const;

uint64_t footer_size() const;

Expand Down
17 changes: 15 additions & 2 deletions tiledb/sm/serialization/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -310,8 +310,21 @@ void array_from_capnp(
&resources,
array->memory_tracker(),
frag_meta_reader.getVersion());
throw_if_not_ok(fragment_metadata_from_capnp(
array->array_schema_latest_ptr(), frag_meta_reader, meta));

auto schema = array->array_schema_latest_ptr();
if (frag_meta_reader.hasArraySchemaName()) {
auto fragment_array_schema_name =
frag_meta_reader.getArraySchemaName().cStr();
schema = array->array_schemas_all().at(fragment_array_schema_name);
} else if (array->array_schemas_all().size() > 1) {
throw ArraySerializationException(
"Cannot deserialize fragment metadata without an array schema name "
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"in the case of arrays with evolved schemas.");
}

// pass the right schema to deserialize fragment metadata
throw_if_not_ok(
fragment_metadata_from_capnp(schema, frag_meta_reader, meta));
if (client_side) {
meta->loaded_metadata()->set_rtree_loaded();
}
Expand Down
23 changes: 14 additions & 9 deletions tiledb/sm/serialization/fragment_metadata.cc
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ void generic_tile_offsets_from_capnp(
}

Status fragment_metadata_from_capnp(
const shared_ptr<const ArraySchema>& array_schema,
const shared_ptr<const ArraySchema>& fragment_array_schema,
const capnp::FragmentMetadata::Reader& frag_meta_reader,
shared_ptr<FragmentMetadata> frag_meta) {
if (frag_meta_reader.hasFileSizes()) {
Expand Down Expand Up @@ -145,7 +145,8 @@ Status fragment_metadata_from_capnp(
if (frag_meta_reader.hasFragmentUri()) {
// Reconstruct the fragment uri out of the received fragment name
frag_meta->fragment_uri() = deserialize_array_uri_to_absolute(
frag_meta_reader.getFragmentUri().cStr(), array_schema->array_uri());
frag_meta_reader.getFragmentUri().cStr(),
fragment_array_schema->array_uri());
}
frag_meta->has_timestamps() = frag_meta_reader.getHasTimestamps();
frag_meta->has_delete_meta() = frag_meta_reader.getHasDeleteMeta();
Expand All @@ -156,11 +157,13 @@ Status fragment_metadata_from_capnp(
frag_meta->version() = frag_meta_reader.getVersion();

// Set the array schema and most importantly retrigger the build
// of the internal idx_map. Also set array_schema_name which is used
// in some places in the global writer
frag_meta->set_array_schema(array_schema);
frag_meta->set_schema_name(array_schema->name());
frag_meta->set_dense(array_schema->dense());
// of the internal idx_map.
frag_meta->set_array_schema(fragment_array_schema);
frag_meta->set_dense(fragment_array_schema->dense());

if (frag_meta_reader.hasArraySchemaName()) {
frag_meta->set_schema_name(frag_meta_reader.getArraySchemaName().cStr());
}

LoadedFragmentMetadata::LoadedMetadata loaded_metadata;

Expand Down Expand Up @@ -367,7 +370,7 @@ Status fragment_metadata_from_capnp(

if (frag_meta_reader.hasRtree()) {
auto data = frag_meta_reader.getRtree();
auto& domain = array_schema->domain();
auto& domain = fragment_array_schema->domain();
// If there are no levels, we still need domain_ properly initialized
frag_meta->loaded_metadata()->rtree().reset(
&domain, constants::rtree_fanout);
Expand All @@ -391,7 +394,7 @@ Status fragment_metadata_from_capnp(
RETURN_NOT_OK(status);
// Whilst sparse gets its domain calculated, dense needs to have it
// set here from the deserialized data
if (array_schema->dense()) {
if (fragment_array_schema->dense()) {
frag_meta->init_domain(*ndrange);
} else {
const auto& frag0_dom = *ndrange;
Expand Down Expand Up @@ -699,6 +702,8 @@ Status fragment_metadata_to_capnp(
generic_tile_offsets_to_capnp(
frag_meta.generic_tile_offsets(), gt_offsets_builder);

frag_meta_builder->setArraySchemaName(frag_meta.array_schema_name());

return Status::Ok();
}

Expand Down
4 changes: 2 additions & 2 deletions tiledb/sm/serialization/fragment_metadata.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,15 @@ namespace serialization {
/**
* Convert Cap'n Proto message to Fragment Metadata
*
* @param array_schema the schema of the array the metadata belongs
* @param array_schema the schema of the fragment the metadata belongs
* @param frag_meta_reader cap'n proto class
* @param frag_meta fragment metadata object to deserialize into
* @param resources ContextResources associated
* @param memory_tracker memory tracker associated
* @return Status
*/
Status fragment_metadata_from_capnp(
const shared_ptr<const ArraySchema>& array_schema,
const shared_ptr<const ArraySchema>& fragment_array_schema,
const capnp::FragmentMetadata::Reader& frag_meta_reader,
shared_ptr<FragmentMetadata> frag_meta);

Expand Down
3 changes: 3 additions & 0 deletions tiledb/sm/serialization/tiledb-rest.capnp
Original file line number Diff line number Diff line change
Expand Up @@ -1163,6 +1163,9 @@ struct FragmentMetadata {

gtOffsets @28 :GenericTileOffsets;
# the start offsets of the generic tiles stored in the metadata file

arraySchemaName @29 :Text;
# array schema name
}

struct MultiPartUploadState {
Expand Down
Loading
Loading