Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix: Empty type #1227

Merged
merged 36 commits into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from 34 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
e8e5fae
Add src and dest type descriptors to decode or expand. Add backfilling.
Jan 10, 2024
0ab4c22
Fix string backfills in emoty type. Add test cases for all types to c…
Jan 11, 2024
d350261
Add a test for appending none to int
Jan 15, 2024
8eb7253
Another batch of fixes for the empty data type
Jan 22, 2024
840d899
Use boost span for flatten_and_fix_rows
Jan 25, 2024
1c97adf
Use TypeDescriptor constructor instead of static_cast
Jan 25, 2024
5638585
Use truncate columns on update and fix pybool type handler so that if…
Jan 29, 2024
0efb037
Complete tesing for updating with empty type colummn
Jan 30, 2024
f25b788
Additional test
Jan 30, 2024
17b642c
FWD declare buffer holder
Jan 31, 2024
0334f45
Add a single function to handle default initialization.
Feb 1, 2024
e3d71cb
Change pybool type name so a language agnostic name bool_object
Feb 1, 2024
907f103
Merge branch 'master' into empty-type-fixes
Feb 1, 2024
868c607
Skip nullable booleans in empty types tests
Feb 1, 2024
5f244ca
Fix compiler warnings
Feb 1, 2024
3d8c784
Work on matching compiler flags
Feb 2, 2024
8d0484a
Add tests for updating an empty column
Feb 2, 2024
6343472
Fix conda builds
Feb 2, 2024
0a36fe6
Fix MSVC release builds
Feb 5, 2024
98687fa
Merge branch 'master' into empty-type-fixes
Feb 5, 2024
093e4f6
More MSVC compilation error fixes
Feb 5, 2024
ff4bcdd
Fix compilation error in MSVC for C++ unit tests
Feb 5, 2024
c2ef852
Fix rewrite partial segment so that it doesn't try to truncate with e…
Feb 9, 2024
507fdaa
Merge branch 'master' into empty-type-fixes-2
Feb 9, 2024
c6c212f
Fix string reducer for static schema when empty segment part of a str…
Feb 14, 2024
b3d477f
Merge branch 'master' into empty-type-fixes
Feb 14, 2024
224916d
Add comment on how pandas handles None when dtype is bool
Feb 14, 2024
a18f2b2
Use std::array for flatten_and_fix_rows instead of boost::span
Feb 14, 2024
3033303
Refactor string pool reconstruction in segment truncation as per PR c…
Feb 14, 2024
0d0c31e
Fix compilation errors under linux
Feb 14, 2024
5684779
Prevent creation of sparse map for empty typed columns
Feb 15, 2024
275b62d
Use Column::transform when reconstructing the string pool
Feb 16, 2024
e0d2c8d
Merge branch 'master' into empty-type-fixes-2
Feb 16, 2024
e6e3e23
Fix MSVC compilation errors
Feb 16, 2024
aeff5a2
Fix string pool reconstruction by adding checks for is_a_string
Feb 19, 2024
380663f
Merge branch 'master' into empty-type-fixes
Feb 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions cpp/arcticdb/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ ENDIF()

set(CMAKE_INSTALL_RPATH "\\$ORIGIN/lib")

if (NOT WIN32)
if (NOT MSVC)
if(NOT ${ARCTICDB_USING_CONDA})
# Compilers on conda-forge are relatively strict.
# For now, we do not want to treat warnings as errors for those builds.
Expand All @@ -166,6 +166,13 @@ if (NOT WIN32)
add_compile_options("-ggdb")
add_compile_options("-fvisibility=hidden")
add_compile_options("-fno-omit-frame-pointer")
else()
add_compile_options(
/WX
/wd4250 # Suppress inherits via dominance warning
/we4189 # Unused local variable is an error
/we4100 # Unused function parameter is an error
)
endif ()

## Core library without python bindings ##
Expand Down Expand Up @@ -200,6 +207,7 @@ set(arcticdb_srcs
entity/key.hpp
entity/metrics.hpp
entity/data_error.hpp
entity/descriptors.hpp
entity/native_tensor.hpp
entity/performance_tracing.hpp
entity/protobuf_mappings.hpp
Expand All @@ -208,6 +216,7 @@ set(arcticdb_srcs
entity/serialized_key.hpp
entity/type_conversion.hpp
entity/types.hpp
entity/type_utils.hpp
entity/types-inl.hpp
entity/variant_key.hpp
entity/versioned_item.hpp
Expand All @@ -233,7 +242,6 @@ set(arcticdb_srcs
pipeline/read_pipeline.hpp
pipeline/slicing.hpp
pipeline/string_pool_utils.hpp
pipeline/string_pool_utils.hpp
pipeline/value.hpp
pipeline/value_set.hpp
pipeline/write_frame.hpp
Expand Down Expand Up @@ -376,7 +384,9 @@ set(arcticdb_srcs
entity/metrics.cpp
entity/performance_tracing.cpp
entity/types.cpp
entity/type_utils.cpp
log/log.cpp
pipeline/column_mapping.cpp
pipeline/column_stats.cpp
pipeline/frame_slice.cpp
pipeline/frame_utils.cpp
Expand Down
5 changes: 1 addition & 4 deletions cpp/arcticdb/codec/codec-inl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,6 @@

#include <arcticdb/util/bitset.hpp>

#include <arcticdb/util/buffer.hpp>

#include <google/protobuf/text_format.h>

#include <type_traits>

namespace arcticdb {
Expand Down Expand Up @@ -135,6 +131,7 @@ std::size_t decode_ndarray(
}

if(field.sparse_map_bytes()) {
util::check(!is_empty_type(type_desc_tag.data_type()), "Empty typed columns should not have sparse map");
util::check_magic<util::BitMagicStart>(data_in);
const auto bitmap_size = field.sparse_map_bytes() - util::combined_bit_magic_delimiters_size();
bv = util::deserialize_bytes_to_bitmap(data_in, bitmap_size);
Expand Down
55 changes: 35 additions & 20 deletions cpp/arcticdb/codec/codec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,11 +310,12 @@ void decode_string_pool( const arcticdb::proto::encoding::SegmentHeader& hdr,
}
}

void decode_v2(const Segment& segment,
arcticdb::proto::encoding::SegmentHeader& hdr,
SegmentInMemory& res,
const StreamDescriptor& desc)
{
void decode_v2(
const Segment& segment,
arcticdb::proto::encoding::SegmentHeader& hdr,
SegmentInMemory& res,
const StreamDescriptor& desc
) {
ARCTICDB_SAMPLE(DecodeSegment, 0)
const auto [begin, end] = get_segment_begin_end(segment, hdr);
auto encoded_fields_ptr = end;
Expand Down Expand Up @@ -364,11 +365,12 @@ void decode_v2(const Segment& segment,
res.set_compacted(segment.header().compacted());
}}

void decode_v1(const Segment& segment,
const arcticdb::proto::encoding::SegmentHeader& hdr,
SegmentInMemory& res,
StreamDescriptor::Proto& desc)
{
void decode_v1(
const Segment& segment,
const arcticdb::proto::encoding::SegmentHeader& hdr,
SegmentInMemory& res,
StreamDescriptor::Proto& desc
) {
ARCTICDB_SAMPLE(DecodeSegment, 0)
const uint8_t* data = segment.buffer().data();
util::check(data != nullptr, "Got null data ptr from segment");
Expand All @@ -384,17 +386,29 @@ void decode_v1(const Segment& segment,
const auto seg_row_count = fields_size ? ssize_t(hdr.fields(0).ndarray().items_count()) : 0LL;
res.init_column_map();

for (std::size_t i = 0; i < static_cast<size_t>(fields_size); ++i) {
const auto& field = hdr.fields(static_cast<int>(i));
const auto& field_name = desc.fields(static_cast<int>(i)).name();
util::check(data!=end, "Reached end of input block with {} fields to decode", fields_size-i);
if(auto col_index = res.column_index(field_name)) {
for (int i = 0; i < fields_size; ++i) {
const auto& field = hdr.fields(i);
const auto& field_name = desc.fields(i).name();
if (auto col_index = res.column_index(field_name)) {
auto& col = res.column(static_cast<position_t>(*col_index));
data += decode_field(res.field(*col_index).type(), field, data, col, col.opt_sparse_map(), to_encoding_version(hdr.encoding_version()));
} else
util::check(
data != end || is_empty_type(col.type().data_type()),
"Reached end of input block with {} fields to decode",
fields_size - i
);
data += decode_field(
res.field(*col_index).type(),
field,
data,
col,
col.opt_sparse_map(),
to_encoding_version(hdr.encoding_version())
);
} else {
util::check(data != end, "Reached end of input block with {} fields to decode", fields_size - i);
data += encoding_sizes::field_compressed_size(field);

ARCTICDB_TRACE(log::codec(), "Decoded column {} to position {}", i, data-begin);
}
ARCTICDB_TRACE(log::codec(), "Decoded column {} to position {}", i, data - begin);
}

decode_string_pool(hdr, data, begin, end, res);
Expand Down Expand Up @@ -500,7 +514,8 @@ void encode_sparse_map(
Buffer& out,
std::ptrdiff_t& pos
) {
if (column_data.bit_vector() != nullptr && column_data.bit_vector()->count() > 0) {
if (column_data.bit_vector() != nullptr && column_data.bit_vector()->count() > 0) {
util::check(!is_empty_type(column_data.type().data_type()), "Empty typed columns should not have sparse maps");
ARCTICDB_DEBUG(log::codec(), "Sparse map count = {} pos = {}", column_data.bit_vector()->count(), pos);
const size_t sparse_bm_bytes = encode_bitmap(*column_data.bit_vector(), out, pos);
util::variant_match(variant_field, [&](auto field) {
Expand Down
35 changes: 19 additions & 16 deletions cpp/arcticdb/codec/zstd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,28 +51,31 @@ struct ZstdBlockEncoder {

struct ZstdDecoder {

/*
* encoder_version is here to support multiple versions but won't be used before we have them
*/
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"

template<typename T>
/// @param[in] encoder_version Used to support multiple versions but won't be used before we have them
template <typename T>
static void decode_block(
std::uint32_t encoder_version,
const std::uint8_t *in,
[[maybe_unused]] std::uint32_t encoder_version,
const std::uint8_t* in,
std::size_t in_bytes,
T *t_out,
std::size_t out_bytes) {
T* t_out,
std::size_t out_bytes
) {

const std::size_t decomp_size = ZSTD_getFrameContentSize(in, in_bytes);
codec::check<ErrorCode::E_DECODE_ERROR>(decomp_size == out_bytes, "expected out_bytes == zstd deduced bytes, actual {} != {}",
out_bytes, decomp_size);
codec::check<ErrorCode::E_DECODE_ERROR>(
decomp_size == out_bytes,
"expected out_bytes == zstd deduced bytes, actual {} != {}",
out_bytes,
decomp_size
);
std::size_t real_decomp = ZSTD_decompress(t_out, out_bytes, in, in_bytes);
codec::check<ErrorCode::E_DECODE_ERROR>(real_decomp == out_bytes, "expected out_bytes == zstd decompressed bytes, actual {} != {}",
out_bytes, real_decomp);
codec::check<ErrorCode::E_DECODE_ERROR>(
real_decomp == out_bytes,
"expected out_bytes == zstd decompressed bytes, actual {} != {}",
out_bytes,
real_decomp
);
}
#pragma GCC diagnostic pop
};

} // namespace arcticdb::detail
Loading
Loading