From e8e5faeab39d35bae3745e036c9d1d75143b2bbf Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Wed, 10 Jan 2024 13:05:14 +0200
Subject: [PATCH 01/30] Add src and dest type descriptors to decode or expand.
 Add backfilling.

---
 cpp/arcticdb/entity/types.hpp                 | 15 +++-
 cpp/arcticdb/pipeline/read_frame.cpp          | 78 +++++++++++++++----
 cpp/arcticdb/python/python_handlers.cpp       | 46 +++++++----
 cpp/arcticdb/python/python_handlers.hpp       | 12 ++-
 cpp/arcticdb/util/type_handler.hpp            | 18 ++++-
 .../version_store/test_empty_column_type.py   | 26 +++++--
 6 files changed, 150 insertions(+), 45 deletions(-)
diff --git a/cpp/arcticdb/entity/types.hpp b/cpp/arcticdb/entity/types.hpp
index 876bd69947..1e4b98705a 100644
--- a/cpp/arcticdb/entity/types.hpp
+++ b/cpp/arcticdb/entity/types.hpp
@@ -508,6 +508,10 @@ struct TypeDescriptor {
         data_type_ = combine_data_type(slice_value_type(data_type_), new_size_bits);
     }
 
+    [[nodiscard]] SizeBits get_size_bits() const {
+        return slice_bit_size(data_type_);
+    }
+
     [[nodiscard]] constexpr int get_type_byte_size() const {
         return get_byte_count(slice_bit_size(data_type_));
     }
@@ -529,11 +533,16 @@ constexpr bool is_numpy_array(TypeDescriptor td) {
 }
 
 constexpr bool is_pyobject_type(TypeDescriptor td) {
-    return is_dynamic_string_type(slice_value_type(td.data_type())) ||
-           slice_value_type(td.data_type()) == ValueType::PYBOOL ||
-           is_numpy_array(td);
+	return is_dynamic_string_type(slice_value_type(td.data_type())) || is_py_bool_type(td.data_type()) ||
+		is_numpy_array(td);
 }
 
+constexpr inline bool is_nullable_type(TypeDescriptor td) {
+    return td.dimension() > Dimension::Dim0 ||
+        is_empty_type(td.data_type()) ||
+        is_sequence_type(td.data_type()) ||
+        is_py_bool_type(td.data_type());
+}
 
 inline void set_data_type(DataType data_type, TypeDescriptor& type_desc) {
     type_desc.data_type_ = data_type;
diff --git a/cpp/arcticdb/pipeline/read_frame.cpp b/cpp/arcticdb/pipeline/read_frame.cpp
index 7077941d0c..5432de6215 100644
--- a/cpp/arcticdb/pipeline/read_frame.cpp
+++ b/cpp/arcticdb/pipeline/read_frame.cpp
@@ -225,12 +225,22 @@ void decode_or_expand_impl(
     const uint8_t*& data,
     uint8_t* dest,
     const EncodedFieldType& encoded_field_info,
-    const TypeDescriptor& type_descriptor,
+    const TypeDescriptor& source_type_descriptor,
+    const TypeDescriptor& destination_type_descriptor,
     size_t dest_bytes,
     std::shared_ptr<BufferHolder> buffers,
 	EncodingVersion encding_version) {
-    if(auto handler = TypeHandlerRegistry::instance()->get_handler(type_descriptor); handler) {
-        handler->handle_type(data, dest, VariantField{&encoded_field_info}, type_descriptor, dest_bytes, std::move(buffers), encding_version);
+    if(auto handler = TypeHandlerRegistry::instance()->get_handler(source_type_descriptor); handler) {
+        handler->handle_type(
+            data,
+            dest,
+            VariantField{&encoded_field_info},
+            source_type_descriptor,
+            destination_type_descriptor,
+            dest_bytes,
+            std::move(buffers),
+            encding_version
+        );
     } else {
         std::optional<util::BitMagic> bv;
         if (encoded_field_info.has_ndarray() && encoded_field_info.ndarray().sparse_map_bytes() > 0) {
@@ -238,8 +248,8 @@ void decode_or_expand_impl(
             const auto bytes = encoding_sizes::data_uncompressed_size(ndarray);
             ChunkedBuffer sparse{bytes};
             SliceDataSink sparse_sink{sparse.data(), bytes};
-            data += decode_field(type_descriptor, encoded_field_info, data, sparse_sink, bv, encding_version);
-            type_descriptor.visit_tag([dest, dest_bytes, &bv, &sparse](const auto tdt) {
+            data += decode_field(source_type_descriptor, encoded_field_info, data, sparse_sink, bv, encding_version);
+            source_type_descriptor.visit_tag([dest, dest_bytes, &bv, &sparse](const auto tdt) {
                 using TagType = decltype(tdt);
                 using RawType = typename TagType::DataTypeTag::raw_type;
                 util::default_initialize<TagType>(dest, dest_bytes);
@@ -249,12 +259,12 @@ void decode_or_expand_impl(
             SliceDataSink sink(dest, dest_bytes);
             const auto &ndarray = encoded_field_info.ndarray();
             if (const auto bytes = encoding_sizes::data_uncompressed_size(ndarray); bytes < dest_bytes) {
-                type_descriptor.visit_tag([dest, bytes, dest_bytes](const auto tdt) {
+                source_type_descriptor.visit_tag([dest, bytes, dest_bytes](const auto tdt) {
                     using TagType = decltype(tdt);
                     util::default_initialize<TagType>(dest + bytes, dest_bytes - bytes);
                 });
             }
-            data += decode_field(type_descriptor, encoded_field_info, data, sink, bv, encding_version);
+            data += decode_field(source_type_descriptor, encoded_field_info, data, sink, bv, encding_version);
         }
     }
 }
@@ -279,13 +289,23 @@ void decode_or_expand(
     const uint8_t*& data,
     uint8_t* dest,
     const VariantField& variant_field,
-    const TypeDescriptor& type_descriptor,
+    const TypeDescriptor& source_type_descriptor,
+    const TypeDescriptor& destination_type_descriptor,
     size_t dest_bytes,
     std::shared_ptr<BufferHolder> buffers,
     EncodingVersion encoding_version
 ) {
     util::variant_match(variant_field, [&](auto field) {
-        decode_or_expand_impl(data, dest, *field, type_descriptor, dest_bytes, buffers, encoding_version);
+		decode_or_expand_impl(
+			data,
+			dest,
+			*field,
+			source_type_descriptor,
+			destination_type_descriptor,
+			dest_bytes,
+			buffers,
+			encoding_version
+		);
     });
 }
 
@@ -369,7 +389,9 @@ void decode_into_frame_static(
             auto field_name = context.descriptor().fields(it.source_field_pos()).name();
             auto& buffer = frame.column(static_cast<ssize_t>(it.dest_col())).data().buffer();
             ColumnMapping m{frame, it.dest_col(), it.source_field_pos(), context};
-            util::check(trivially_compatible_types(m.source_type_desc_, m.dest_type_desc_), "Column type conversion from {} to {} not implemented in column {}:{} -> {}:{}",
+            const bool types_trivially_compatible = trivially_compatible_types(m.source_type_desc_, m.dest_type_desc_);
+            const bool any_type_is_empty = is_empty_type(m.source_type_desc_.data_type()) || is_empty_type(m.dest_type_desc_.data_type());
+            util::check(types_trivially_compatible || any_type_is_empty, "Column type conversion from {} to {} not implemented in column {}:{} -> {}:{}",
                         m.source_type_desc_,
                         m.dest_type_desc_,
                         it.source_col(),
@@ -377,7 +399,16 @@ void decode_into_frame_static(
                         it.dest_col(),
                         m.frame_field_descriptor_.name());
             util::check(data != end || remaining_fields_empty(it, context), "Reached end of input block with {} fields to decode", it.remaining_fields());
-            decode_or_expand(data, buffer.data() + m.offset_bytes_, encoded_field, m.source_type_desc_,  m.dest_bytes_, buffers, encoding_version);
+			decode_or_expand(
+				data,
+				buffer.data() + m.offset_bytes_,
+				encoded_field,
+				m.source_type_desc_,
+				m.dest_type_desc_,
+				m.dest_bytes_,
+				buffers,
+				encoding_version
+			);
             ARCTICDB_TRACE(log::codec(), "Decoded column {} to position {}", field_name, data - begin);
 
             it.advance();
@@ -449,14 +480,22 @@ void decode_into_frame_dynamic(
                             if constexpr(std::is_arithmetic_v<SourceType> && std::is_arithmetic_v<DestinationType>) {
                                 const auto src_bytes = sizeof_datatype(m.source_type_desc_) * m.num_rows_;
                                 Buffer tmp_buf{src_bytes};
-                                decode_or_expand(data, tmp_buf.data(), encoded_field, m.source_type_desc_, src_bytes, buffers, encdoing_version);
+								decode_or_expand(
+									data,
+									tmp_buf.data(),
+									encoded_field,
+									m.source_type_desc_,
+									m.dest_type_desc_,
+									src_bytes,
+									buffers,
+									encdoing_version
+								);
                                 auto src_ptr = reinterpret_cast<SourceType *>(tmp_buf.data());
                                 auto dest_ptr = reinterpret_cast<DestinationType *>(buffer.data() + m.offset_bytes_);
                                 for (auto i = 0u; i < m.num_rows_; ++i) {
                                     *dest_ptr++ = static_cast<DestinationType>(*src_ptr++);
                                 }
-                            }
-                            else {
+                            } else {
                                 util::raise_rte("Can't promote type {} to type {} in field {}", m.source_type_desc_, m.dest_type_desc_, m.frame_field_descriptor_.name());
                             }
                         });
@@ -469,7 +508,16 @@ void decode_into_frame_dynamic(
                             "Reached end of input block with {} fields to decode",
                             field_count - field_col);
 
-                decode_or_expand(data, buffer.data() + m.offset_bytes_, encoded_field, m.source_type_desc_, m.dest_bytes_, buffers, encdoing_version);
+                decode_or_expand(
+					data,
+					buffer.data() + m.offset_bytes_,
+					encoded_field,
+					m.source_type_desc_,
+					m.dest_type_desc_,
+					m.dest_bytes_,
+					buffers,
+					encdoing_version
+				);
             }
             ARCTICDB_TRACE(log::codec(), "Decoded column {} to position {}", frame.field(dst_col).name(), data - begin);
         }
diff --git a/cpp/arcticdb/python/python_handlers.cpp b/cpp/arcticdb/python/python_handlers.cpp
index cadf3e58ef..c0e3abbbd0 100644
--- a/cpp/arcticdb/python/python_handlers.cpp
+++ b/cpp/arcticdb/python/python_handlers.cpp
@@ -26,8 +26,8 @@ namespace arcticdb {
     }
 
     /// @important This calls pybind's initialize array function which is NOT thread safe. Moreover, numpy arrays can
-    /// be created only by the thread holindg the GIL. In practice we can get away with allocating arrays only from
-    /// a sigle thread (enve if it's not the one holding the GIL). This, however, is not guranteed to work.
+    /// be created only by the thread holding the GIL. In practice we can get away with allocating arrays only from
+    /// a single thread (even if it's not the one holding the GIL). This, however, is not guaranteed to work.
     /// @todo Allocate numpy arrays only from the thread holding the GIL
     [[nodiscard]] static inline PyObject* initialize_array(
         const pybind11::dtype& descr,
@@ -57,7 +57,8 @@ namespace arcticdb {
         const uint8_t*& input,
         uint8_t* dest,
         const VariantField& variant_field,
-        const entity::TypeDescriptor&,
+        const entity::TypeDescriptor& source_type_descriptor,
+        const entity::TypeDescriptor& destination_type_descriptor,
         size_t dest_bytes,
         std::shared_ptr<BufferHolder>,
         EncodingVersion
@@ -66,8 +67,25 @@ namespace arcticdb {
         util::check(dest != nullptr, "Got null destination pointer");
         const size_t num_rows = dest_bytes / get_type_size(DataType::EMPTYVAL);
         static_assert(get_type_size(DataType::EMPTYVAL) == sizeof(PyObject *));
-        auto ptr_dest = reinterpret_cast<const PyObject **>(dest);
-        fill_with_none(ptr_dest, num_rows);
+
+        const SizeBits destination_size_bits = destination_type_descriptor.get_size_bits();
+        destination_type_descriptor.visit_tag([&](auto tag) {
+            if constexpr (is_nullable_type(TypeDescriptor(tag))) {
+                auto ptr_dest = reinterpret_cast<const PyObject**>(dest);
+                fill_with_none(ptr_dest, num_rows);
+            } else if constexpr (is_floating_point_type(tag.data_type())) {
+                using RawType = typename decltype(tag)::DataTypeTag::raw_type;
+                RawType* ptr_dest = reinterpret_cast<RawType*>(dest);
+                std::generate(ptr_dest, ptr_dest + num_rows, &std::numeric_limits<RawType>::quiet_NaN);
+                dest += num_rows * sizeof(RawType);
+            } else if constexpr (is_integer_type(tag.data_type()) || is_bool_type(tag.data_type()) || is_time_type(tag.data_type())) {
+                using RawType = typename decltype(tag)::DataTypeTag::raw_type;
+                std::memset(dest, 0, sizeof(RawType) * num_rows);
+                dest += num_rows * sizeof(RawType);
+            } else {
+                static_assert(sizeof(tag) == 0, "Unhandled data_type");
+            }
+        });
 
         util::variant_match(variant_field, [&input](const auto &field) {
             using EncodedFieldType = std::decay_t<decltype(*field)>;
@@ -92,7 +110,8 @@ namespace arcticdb {
             const uint8_t*& data,
             uint8_t* dest,
             const VariantField& encoded_field_info,
-            const entity::TypeDescriptor& type_descriptor,
+            const entity::TypeDescriptor& source_type_descriptor,
+            const entity::TypeDescriptor& destination_type_descriptor,
             size_t dest_bytes,
             std::shared_ptr<BufferHolder>,
             EncodingVersion encding_version
@@ -116,7 +135,7 @@ namespace arcticdb {
             const auto bytes = encoding_sizes::data_uncompressed_size(ndarray);
             auto sparse = ChunkedBuffer::presized(bytes);
             SliceDataSink sparse_sink{sparse.data(), bytes};
-            data += decode_field(type_descriptor, *field, data, sparse_sink, bv, encding_version);
+            data += decode_field(source_type_descriptor, *field, data, sparse_sink, bv, encding_version);
             const auto num_bools = bv->count();
             auto ptr_src = sparse.template ptr_cast<uint8_t>(0, num_bools * sizeof(uint8_t));
             auto last_row = 0u;
@@ -142,7 +161,8 @@ namespace arcticdb {
         const uint8_t*& data,
         uint8_t* dest,
         const VariantField& encoded_field_info,
-        const entity::TypeDescriptor& type_descriptor,
+        const entity::TypeDescriptor& source_type_descriptor,
+        const entity::TypeDescriptor& destination_type_descriptor,
         size_t dest_bytes,
         std::shared_ptr<BufferHolder> buffers,
 		EncodingVersion encoding_version
@@ -158,17 +178,17 @@ namespace arcticdb {
                 fill_with_none(ptr_dest, row_count);
                 return;
             }
-            std::shared_ptr<Column> column = buffers->get_buffer(type_descriptor, true);
+            std::shared_ptr<Column> column = buffers->get_buffer(source_type_descriptor, true);
             column->check_magic();
             log::version().info("Column got buffer at {}", uintptr_t(column.get()));
             auto bv = std::make_optional(util::BitSet{});
-            data += decode_field(type_descriptor, *field, data, *column, bv, encoding_version);
+            data += decode_field(source_type_descriptor, *field, data, *column, bv, encoding_version);
 
             auto last_row = 0u;
             ARCTICDB_SUBSAMPLE(InitArrayAcquireGIL, 0)
-            const auto strides = static_cast<stride_t>(get_type_size(type_descriptor.data_type()));
-            const py::dtype py_dtype = generate_python_dtype(type_descriptor, strides);
-            type_descriptor.visit_tag([&] (auto tdt) {
+            const auto strides = static_cast<stride_t>(get_type_size(source_type_descriptor.data_type()));
+            const py::dtype py_dtype = generate_python_dtype(source_type_descriptor, strides);
+            source_type_descriptor.visit_tag([&] (auto tdt) {
                 const auto& blocks = column->blocks();
                 if(blocks.empty())
                     return;
diff --git a/cpp/arcticdb/python/python_handlers.hpp b/cpp/arcticdb/python/python_handlers.hpp
index f6f750791b..4c501bee5c 100644
--- a/cpp/arcticdb/python/python_handlers.hpp
+++ b/cpp/arcticdb/python/python_handlers.hpp
@@ -17,7 +17,8 @@ namespace arcticdb {
             const uint8_t*& data,
             uint8_t* dest,
             const VariantField& encoded_field,
-            const entity::TypeDescriptor& type_descriptor,
+            const entity::TypeDescriptor& source_type_descriptor,
+            const entity::TypeDescriptor& destination_type_descriptor,
             size_t dest_bytes,
             std::shared_ptr<BufferHolder> buffers,
             EncodingVersion encding_version);
@@ -29,7 +30,8 @@ namespace arcticdb {
             const uint8_t *&data,
             uint8_t *dest,
             const VariantField &encoded_field,
-            const entity::TypeDescriptor &type_descriptor,
+            const entity::TypeDescriptor& source_type_descriptor,
+            const entity::TypeDescriptor& destination_type_descriptor,
             size_t dest_bytes,
             std::shared_ptr<BufferHolder> buffers,
             EncodingVersion encding_version);
@@ -40,7 +42,8 @@ namespace arcticdb {
                 const uint8_t*& data,
                 uint8_t* dest,
                 const VariantField& encoded_field,
-                const entity::TypeDescriptor& type_descriptor,
+                const entity::TypeDescriptor& source_type_descriptor,
+                const entity::TypeDescriptor& destination_type_descriptor,
                 size_t dest_bytes,
                 std::shared_ptr<BufferHolder> buffers
         );
@@ -55,7 +58,8 @@ namespace arcticdb {
             const uint8_t*& data,
             uint8_t* dest,
             const VariantField& encoded_field,
-            const entity::TypeDescriptor& type_descriptor,
+            const entity::TypeDescriptor& source_type_descriptor,
+            const entity::TypeDescriptor& destination_type_descriptor,
             size_t dest_bytes,
             std::shared_ptr<BufferHolder> buffers,
             EncodingVersion encding_version);
diff --git a/cpp/arcticdb/util/type_handler.hpp b/cpp/arcticdb/util/type_handler.hpp
index a696b0c9ec..aa21f0e14f 100644
--- a/cpp/arcticdb/util/type_handler.hpp
+++ b/cpp/arcticdb/util/type_handler.hpp
@@ -32,11 +32,23 @@ struct ITypeHandler {
             const uint8_t*& source,
             uint8_t* dest,
             const VariantField& encoded_field_info,
-            const entity::TypeDescriptor& type_descriptor,
+            const entity::TypeDescriptor& source_type_descriptor,
+            const entity::TypeDescriptor& destination_type_descriptor,
             size_t dest_bytes,
             std::shared_ptr<BufferHolder> buffers,
-            EncodingVersion encoding_version) {
-            folly::poly_call<0>(*this, source, dest, encoded_field_info, type_descriptor, dest_bytes, buffers, encoding_version);
+            EncodingVersion encoding_version
+        ) {
+            folly::poly_call<0>(
+                *this,
+                source,
+                dest,
+                encoded_field_info,
+                source_type_descriptor,
+                destination_type_descriptor,
+                dest_bytes,
+                buffers,
+                encoding_version
+            );
         }
     };
 
diff --git a/python/tests/unit/arcticdb/version_store/test_empty_column_type.py b/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
index 1dc64f9881..7f706da764 100644
--- a/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
+++ b/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
@@ -10,21 +10,32 @@
 import numpy as np
 import pytest
 
-@pytest.fixture(params=(int, float, str))
-def element_type_constructor(request):
-    yield request.param
+@pytest.fixture
+def int_dtype():
+    bit_sizes = ["8", "16", "32", "64"]
+    base_numeric_types = ["int", "uint"]
+    for t in base_numeric_types:
+        for s in bit_sizes:
+            yield t + s
 
 
-@pytest.mark.xfail(reason="Not implemented")
+@pytest.fixture
+def float_dtype():
+    bit_sizes = ["32", "64"]
+    for s in bit_sizes:
+        yield "float" + s
+
 class TestEmptyColumnTypeAppend:
-    def test_can_append_non_empty_to_empty(self, lmdb_version_store, element_type_constructor):
+    def test_can_append_non_empty_to_empty(self, lmdb_version_store_v2, int_dtype):
+        lmdb_version_store = lmdb_version_store_v2
         df_empty = pd.DataFrame({"col1": [None, None]})
         lmdb_version_store.write("test_can_append_to_empty_column", df_empty)
-        df_non_empty = pd.DataFrame({"col1": map(element_type_constructor, [1, 2, 3])})
+        df_non_empty = pd.DataFrame({"col1": np.array([1,2,3], dtype=int_dtype)})
         lmdb_version_store.append("test_can_append_to_empty_column", df_non_empty)
-        expected_result = pd.concat([df_empty, df_non_empty], ignore_index=True)
+        expected_result = pd.DataFrame({"col1": np.array([0,0,1,2,3], dtype=int_dtype)})
         assert_frame_equal(lmdb_version_store.read("test_can_append_to_empty_column").data, expected_result)
 
+    @pytest.mark.xfail(reason="Not implemented")
     def test_can_append_empty_to_non_empty(self, lmdb_version_store, element_type_constructor):
         df_empty = pd.DataFrame({"col1": [None, None]})
         df_non_empty = pd.DataFrame({"col1": map(element_type_constructor, [1, 2, 3])})
@@ -33,6 +44,7 @@ def test_can_append_empty_to_non_empty(self, lmdb_version_store, element_type_co
         expected_result = pd.concat([df_non_empty, df_empty], ignore_index=True)
         assert_frame_equal(lmdb_version_store.read("test_can_append_to_empty_column").data, expected_result)
 
+    @pytest.mark.xfail(reason="Not implemented")
     def test_can_append_empty_to_empty(self, lmdb_version_store, element_type_constructor):
         df_empty_1 = pd.DataFrame({"col1": [None, None]})
         df_empty_2 = pd.DataFrame({"col1": []})

From 0ab4c22896842c8806cfcd652b31b279f8d88093 Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Thu, 11 Jan 2024 14:23:50 +0200
Subject: [PATCH 02/30] Fix string backfills in emoty type. Add test cases for
 all types to check if they can be appened to null

---
 cpp/arcticdb/entity/types.hpp                 |  7 --
 cpp/arcticdb/pipeline/read_frame.cpp          | 81 ++++++++---------
 cpp/arcticdb/python/python_handlers.cpp       | 42 +++++----
 cpp/arcticdb/util/sparse_utils.hpp            |  3 +
 python/test.py                                |  7 ++
 .../version_store/test_empty_column_type.py   | 91 +++++++++++--------
 6 files changed, 130 insertions(+), 101 deletions(-)
 create mode 100644 python/test.py

diff --git a/cpp/arcticdb/entity/types.hpp b/cpp/arcticdb/entity/types.hpp
index 1e4b98705a..771a03d083 100644
--- a/cpp/arcticdb/entity/types.hpp
+++ b/cpp/arcticdb/entity/types.hpp
@@ -537,13 +537,6 @@ constexpr bool is_pyobject_type(TypeDescriptor td) {
 		is_numpy_array(td);
 }
 
-constexpr inline bool is_nullable_type(TypeDescriptor td) {
-    return td.dimension() > Dimension::Dim0 ||
-        is_empty_type(td.data_type()) ||
-        is_sequence_type(td.data_type()) ||
-        is_py_bool_type(td.data_type());
-}
-
 inline void set_data_type(DataType data_type, TypeDescriptor& type_desc) {
     type_desc.data_type_ = data_type;
 }
diff --git a/cpp/arcticdb/pipeline/read_frame.cpp b/cpp/arcticdb/pipeline/read_frame.cpp
index 5432de6215..e8f2e90061 100644
--- a/cpp/arcticdb/pipeline/read_frame.cpp
+++ b/cpp/arcticdb/pipeline/read_frame.cpp
@@ -296,16 +296,16 @@ void decode_or_expand(
     EncodingVersion encoding_version
 ) {
     util::variant_match(variant_field, [&](auto field) {
-		decode_or_expand_impl(
-			data,
-			dest,
-			*field,
-			source_type_descriptor,
-			destination_type_descriptor,
-			dest_bytes,
-			buffers,
-			encoding_version
-		);
+        decode_or_expand_impl(
+            data,
+            dest,
+            *field,
+            source_type_descriptor,
+            destination_type_descriptor,
+            dest_bytes,
+            buffers,
+            encoding_version
+        );
     });
 }
 
@@ -399,16 +399,16 @@ void decode_into_frame_static(
                         it.dest_col(),
                         m.frame_field_descriptor_.name());
             util::check(data != end || remaining_fields_empty(it, context), "Reached end of input block with {} fields to decode", it.remaining_fields());
-			decode_or_expand(
-				data,
-				buffer.data() + m.offset_bytes_,
-				encoded_field,
-				m.source_type_desc_,
-				m.dest_type_desc_,
-				m.dest_bytes_,
-				buffers,
-				encoding_version
-			);
+            decode_or_expand(
+                data,
+                buffer.data() + m.offset_bytes_,
+                encoded_field,
+                m.source_type_desc_,
+                m.dest_type_desc_,
+                m.dest_bytes_,
+                buffers,
+                encoding_version
+            );
             ARCTICDB_TRACE(log::codec(), "Decoded column {} to position {}", field_name, data - begin);
 
             it.advance();
@@ -477,19 +477,19 @@ void decode_into_frame_dynamic(
                         using DestinationType =  typename decltype(dest_desc_tag)::DataTypeTag::raw_type;
                         m.source_type_desc_.visit_tag([&buffer, &m, &data, &encoded_field, &buffers, encdoing_version] (auto src_desc_tag ) {
                             using SourceType =  typename decltype(src_desc_tag)::DataTypeTag::raw_type;
-                            if constexpr(std::is_arithmetic_v<SourceType> && std::is_arithmetic_v<DestinationType>) {
+                            if constexpr (std::is_arithmetic_v<SourceType> && std::is_arithmetic_v<DestinationType>) {
                                 const auto src_bytes = sizeof_datatype(m.source_type_desc_) * m.num_rows_;
                                 Buffer tmp_buf{src_bytes};
-								decode_or_expand(
-									data,
-									tmp_buf.data(),
-									encoded_field,
-									m.source_type_desc_,
-									m.dest_type_desc_,
-									src_bytes,
-									buffers,
-									encdoing_version
-								);
+                                decode_or_expand(
+                                    data,
+                                    tmp_buf.data(),
+                                    encoded_field,
+                                    m.source_type_desc_,
+                                    m.dest_type_desc_,
+                                    src_bytes,
+                                    buffers,
+                                    encdoing_version
+                                );
                                 auto src_ptr = reinterpret_cast<SourceType *>(tmp_buf.data());
                                 auto dest_ptr = reinterpret_cast<DestinationType *>(buffer.data() + m.offset_bytes_);
                                 for (auto i = 0u; i < m.num_rows_; ++i) {
@@ -507,17 +507,16 @@ void decode_into_frame_dynamic(
                 util::check(data != end,
                             "Reached end of input block with {} fields to decode",
                             field_count - field_col);
-
                 decode_or_expand(
-					data,
-					buffer.data() + m.offset_bytes_,
-					encoded_field,
-					m.source_type_desc_,
-					m.dest_type_desc_,
-					m.dest_bytes_,
-					buffers,
-					encdoing_version
-				);
+                    data,
+                    buffer.data() + m.offset_bytes_,
+                    encoded_field,
+                    m.source_type_desc_,
+                    m.dest_type_desc_,
+                    m.dest_bytes_,
+                    buffers,
+                    encdoing_version
+                );
             }
             ARCTICDB_TRACE(log::codec(), "Decoded column {} to position {}", frame.field(dst_col).name(), data - begin);
         }
diff --git a/cpp/arcticdb/python/python_handlers.cpp b/cpp/arcticdb/python/python_handlers.cpp
index c0e3abbbd0..b549083444 100644
--- a/cpp/arcticdb/python/python_handlers.cpp
+++ b/cpp/arcticdb/python/python_handlers.cpp
@@ -53,6 +53,10 @@ namespace arcticdb {
         return dest + count;
     }
 
+	constexpr inline bool is_nullable_type(TypeDescriptor td) {
+		return td.dimension() > Dimension::Dim0 || is_empty_type(td.data_type()) || is_py_bool_type(td.data_type());
+	}
+
     void EmptyHandler::handle_type(
         const uint8_t*& input,
         uint8_t* dest,
@@ -65,39 +69,45 @@ namespace arcticdb {
     ) {
         ARCTICDB_SAMPLE(HandleEmpty, 0)
         util::check(dest != nullptr, "Got null destination pointer");
-        const size_t num_rows = dest_bytes / get_type_size(DataType::EMPTYVAL);
-        static_assert(get_type_size(DataType::EMPTYVAL) == sizeof(PyObject *));
+        const size_t num_rows = dest_bytes / destination_type_descriptor.get_type_byte_size();
+        static_assert(get_type_size(DataType::EMPTYVAL) == sizeof(PyObject*));
 
-        const SizeBits destination_size_bits = destination_type_descriptor.get_size_bits();
+        // TODO: arcticdb::util::default_initialize does mostly the same thing
+        // the main difference is that it does not write py::none directly
+        // * Should use that?
+        // * It doesn't handle nullable bools and arrays? How should they be handled?
         destination_type_descriptor.visit_tag([&](auto tag) {
-            if constexpr (is_nullable_type(TypeDescriptor(tag))) {
-                auto ptr_dest = reinterpret_cast<const PyObject**>(dest);
-                fill_with_none(ptr_dest, num_rows);
+            using RawType = typename decltype(tag)::DataTypeTag::raw_type;
+            RawType* ptr_dest = reinterpret_cast<RawType*>(dest);
+            if constexpr (is_sequence_type(tag.data_type())) {
+                // not_a_string() is set here as strings are offset in the string pool
+                // later in fix_and_reduce() the offsets are replaced by a real python strings
+                // TODO: It would be best if the type handler sets this to py::none
+                std::fill_n(ptr_dest, num_rows, arcticdb::not_a_string());
+            } else if constexpr (is_nullable_type(TypeDescriptor(tag))) {
+                fill_with_none(reinterpret_cast<const PyObject**>(dest), num_rows);
             } else if constexpr (is_floating_point_type(tag.data_type())) {
-                using RawType = typename decltype(tag)::DataTypeTag::raw_type;
-                RawType* ptr_dest = reinterpret_cast<RawType*>(dest);
-                std::generate(ptr_dest, ptr_dest + num_rows, &std::numeric_limits<RawType>::quiet_NaN);
-                dest += num_rows * sizeof(RawType);
-            } else if constexpr (is_integer_type(tag.data_type()) || is_bool_type(tag.data_type()) || is_time_type(tag.data_type())) {
-                using RawType = typename decltype(tag)::DataTypeTag::raw_type;
+                std::fill_n(ptr_dest, num_rows, std::numeric_limits<RawType>::quiet_NaN());
+            } else if constexpr (is_integer_type(tag.data_type()) || is_bool_type(tag.data_type())) {
                 std::memset(dest, 0, sizeof(RawType) * num_rows);
-                dest += num_rows * sizeof(RawType);
+            } else if constexpr (is_time_type(tag.data_type())) {
+                std::fill_n(ptr_dest, num_rows, arcticdb::util::NaT);
             } else {
                 static_assert(sizeof(tag) == 0, "Unhandled data_type");
             }
         });
 
-        util::variant_match(variant_field, [&input](const auto &field) {
+        util::variant_match(variant_field, [&input](const auto& field) {
             using EncodedFieldType = std::decay_t<decltype(*field)>;
             if constexpr (std::is_same_v<EncodedFieldType, arcticdb::EncodedField>)
                 util::check_magic<ColumnMagic>(input);
 
             if (field->encoding_case() == EncodedFieldType::kNdarray) {
-                const auto &ndarray_field = field->ndarray();
+                const auto& ndarray_field = field->ndarray();
                 const auto num_blocks = ndarray_field.values_size();
                 util::check(num_blocks <= 1, "Unexpected number of empty type blocks: {}", num_blocks);
                 for (auto block_num = 0; block_num < num_blocks; ++block_num) {
-                    const auto &block_info = ndarray_field.values(block_num);
+                    const auto& block_info = ndarray_field.values(block_num);
                     input += block_info.out_bytes();
                 }
             } else {
diff --git a/cpp/arcticdb/util/sparse_utils.hpp b/cpp/arcticdb/util/sparse_utils.hpp
index e24c6cad59..6eb892c1b3 100644
--- a/cpp/arcticdb/util/sparse_utils.hpp
+++ b/cpp/arcticdb/util/sparse_utils.hpp
@@ -68,6 +68,9 @@ inline void expand_dense_buffer_using_bitmap(const util::BitMagic &bv, const uin
     }
 }
 
+// TODO: EmptyHandler::handle_type has similar procedure.
+//  * Should this be used instead?
+//  * What about nullable booleans and arrays?
 template <typename TagType>
 void default_initialize(uint8_t* data, size_t bytes) {
     using RawType = typename TagType::DataTypeTag::raw_type;
diff --git a/python/test.py b/python/test.py
new file mode 100644
index 0000000000..29127d28a3
--- /dev/null
+++ b/python/test.py
@@ -0,0 +1,7 @@
+from arcticdb import Arctic
+import pandas as pd
+import numpy as np
+ac = Arctic("lmdb://test")
+l = ac.get_library("test", create_if_missing=True)
+df = pd.DataFrame({"col": [1,2,3]})
+l.write("test", df)
\ No newline at end of file
diff --git a/python/tests/unit/arcticdb/version_store/test_empty_column_type.py b/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
index 7f706da764..20e7969b53 100644
--- a/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
+++ b/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
@@ -5,52 +5,69 @@
 
 As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
 """
+from math import nan
 import pandas as pd
 from pandas.testing import assert_frame_equal
 import numpy as np
 import pytest
 
-@pytest.fixture
-def int_dtype():
-    bit_sizes = ["8", "16", "32", "64"]
-    base_numeric_types = ["int", "uint"]
-    for t in base_numeric_types:
-        for s in bit_sizes:
-            yield t + s
+@pytest.fixture(params=[t + s for s in ["8", "16", "32", "64"] for t in ["int", "uint"]])
+def int_dtype(request):
+    yield request.param
 
 
-@pytest.fixture
-def float_dtype():
-    bit_sizes = ["32", "64"]
-    for s in bit_sizes:
-        yield "float" + s
+@pytest.fixture(params=["float" + s for s in ["32", "64"]])
+def float_dtype(request):
+    yield request.param
 
-class TestEmptyColumnTypeAppend:
-    def test_can_append_non_empty_to_empty(self, lmdb_version_store_v2, int_dtype):
-        lmdb_version_store = lmdb_version_store_v2
+
+# object is for nullable boolean
+@pytest.fixture(params=["bool", "object"])
+def boolean_dtype(request):
+    yield request.param
+
+
+class TestCanAppendToEmptyColumn:
+
+    @pytest.fixture(autouse=True)
+    def create_empty_column(self, lmdb_version_store):
         df_empty = pd.DataFrame({"col1": [None, None]})
-        lmdb_version_store.write("test_can_append_to_empty_column", df_empty)
+        lmdb_version_store.write("sym", df_empty)
+        yield
+
+    def test_integer(self, lmdb_version_store, int_dtype):
         df_non_empty = pd.DataFrame({"col1": np.array([1,2,3], dtype=int_dtype)})
-        lmdb_version_store.append("test_can_append_to_empty_column", df_non_empty)
+        lmdb_version_store.append("sym", df_non_empty)
         expected_result = pd.DataFrame({"col1": np.array([0,0,1,2,3], dtype=int_dtype)})
-        assert_frame_equal(lmdb_version_store.read("test_can_append_to_empty_column").data, expected_result)
+        assert_frame_equal(lmdb_version_store.read("sym").data, expected_result)
+
+    def test_float(self, lmdb_version_store, float_dtype):
+        df_non_empty = pd.DataFrame({"col1": np.array([1,2,3], dtype=float_dtype)})
+        lmdb_version_store.append("sym", df_non_empty)
+        expected_result = pd.DataFrame({"col1": np.array([float("NaN"),float("NaN"),1,2,3], dtype=float_dtype)})
+        assert_frame_equal(lmdb_version_store.read("sym").data, expected_result)
+
+    def test_bool(self, lmdb_version_store, boolean_dtype):
+        df_non_empty = pd.DataFrame({"col1": np.array([True, False, True], dtype=boolean_dtype)})
+        lmdb_version_store.append("sym", df_non_empty)
+        expected_result = pd.DataFrame({"col1": np.array([None, None, True, False, True], dtype=boolean_dtype)})
+        assert_frame_equal(lmdb_version_store.read("sym").data, expected_result)
+
+    def test_empty(self, lmdb_version_store):
+        df_non_empty = pd.DataFrame({"col1": np.array([None, None, None])})
+        lmdb_version_store.append("sym", df_non_empty)
+        expected_result = pd.DataFrame({"col1": np.array([None, None, None, None, None])})
+        assert_frame_equal(lmdb_version_store.read("sym").data, expected_result)
+
+    def test_string(self, lmdb_version_store):
+        df_non_empty = pd.DataFrame({"col1": np.array(["some_string", "long_string"*100])})
+        lmdb_version_store.append("sym", df_non_empty)
+        expected_result = pd.DataFrame({"col1": np.array([None, None, "some_string", "long_string"*100])})
+        assert_frame_equal(lmdb_version_store.read("sym").data, expected_result)
+
+    def test_date(self, lmdb_version_store):
+        df_non_empty = pd.DataFrame({"col1": np.array([np.datetime64('2005-02'), np.datetime64('2005-03'), np.datetime64('2005-03')])})
+        lmdb_version_store.append("sym", df_non_empty)
+        expected_result = pd.DataFrame({"col1": np.array([np.datetime64('NaT'), np.datetime64('NaT'), np.datetime64('2005-02'), np.datetime64('2005-03'), np.datetime64('2005-03')], dtype="datetime64[ns]")})
+        assert_frame_equal(lmdb_version_store.read("sym").data, expected_result)
 
-    @pytest.mark.xfail(reason="Not implemented")
-    def test_can_append_empty_to_non_empty(self, lmdb_version_store, element_type_constructor):
-        df_empty = pd.DataFrame({"col1": [None, None]})
-        df_non_empty = pd.DataFrame({"col1": map(element_type_constructor, [1, 2, 3])})
-        lmdb_version_store.write("test_can_append_to_empty_column", df_non_empty)
-        lmdb_version_store.append("test_can_append_to_empty_column", df_empty)
-        expected_result = pd.concat([df_non_empty, df_empty], ignore_index=True)
-        assert_frame_equal(lmdb_version_store.read("test_can_append_to_empty_column").data, expected_result)
-
-    @pytest.mark.xfail(reason="Not implemented")
-    def test_can_append_empty_to_empty(self, lmdb_version_store, element_type_constructor):
-        df_empty_1 = pd.DataFrame({"col1": [None, None]})
-        df_empty_2 = pd.DataFrame({"col1": []})
-        df_empty_3 = pd.DataFrame({"col1": [None]})
-        lmdb_version_store.write("test_can_append_to_empty_column", df_empty_1)
-        lmdb_version_store.append("test_can_append_to_empty_column", df_empty_2)
-        lmdb_version_store.append("test_can_append_to_empty_column", df_empty_3)
-        expected_result = pd.concat([df_empty_1, df_empty_2, df_empty_3], ignore_index=True)
-        assert_frame_equal(lmdb_version_store.read("test_can_append_to_empty_column").data, expected_result)

From d350261e4ceaa515d3d687e56877d851b5e434cf Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Mon, 15 Jan 2024 10:36:17 +0200
Subject: [PATCH 03/30] Add a test for appending none to int

---
 .../version_store/test_empty_column_type.py         | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/python/tests/unit/arcticdb/version_store/test_empty_column_type.py b/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
index 20e7969b53..bd9b03e45a 100644
--- a/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
+++ b/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
@@ -71,3 +71,16 @@ def test_date(self, lmdb_version_store):
         expected_result = pd.DataFrame({"col1": np.array([np.datetime64('NaT'), np.datetime64('NaT'), np.datetime64('2005-02'), np.datetime64('2005-03'), np.datetime64('2005-03')], dtype="datetime64[ns]")})
         assert_frame_equal(lmdb_version_store.read("sym").data, expected_result)
 
+
+class TestCanAppendEmptyToColumn:
+
+    def test_integer(self, lmdb_version_store_v2):
+        int_dtype = "int32"
+        lmdb_version_store = lmdb_version_store_v2
+        df = pd.DataFrame({"col1": np.array([1,2,3], dtype=int_dtype)})
+        lmdb_version_store.write("sym", df)
+        lmdb_version_store.append("sym", pd.DataFrame({"col1": [None, None]}))
+        lmdb_version_store.append("sym", pd.DataFrame({"col1": [None, None]}))
+        expected_result = pd.DataFrame({"col1": np.array([1,2,3,0,0,0,0], dtype=int_dtype)})
+        print(lmdb_version_store.read("sym", row_range=[5,7]).data)
+        assert_frame_equal(lmdb_version_store.read("sym").data, expected_result)
\ No newline at end of file

From 8eb7253e2f023d50ced6df51baf4f88351037ba3 Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Mon, 22 Jan 2024 18:28:39 +0200
Subject: [PATCH 04/30] Another batch of fixes for the empty data type

- Remove PYBOOL64
- Fix issues regarding appending empty to strings and strings to empty
  with dynamic schema.
- Add tests for appending empty to all other types and all other types
  to empty
---
 cpp/arcticdb/CMakeLists.txt                   |   4 +
 cpp/arcticdb/column_store/column.cpp          |  21 +-
 cpp/arcticdb/column_store/column.hpp          |   2 +-
 cpp/arcticdb/column_store/column_utils.hpp    |   8 +-
 cpp/arcticdb/entity/descriptors.hpp           |  14 ++
 cpp/arcticdb/entity/timeseries_descriptor.hpp |   1 -
 cpp/arcticdb/entity/type_utils.cpp            | 160 +++++++++++++
 cpp/arcticdb/entity/type_utils.hpp            | 152 ++----------
 cpp/arcticdb/entity/types-inl.hpp             |   5 +-
 cpp/arcticdb/entity/types.cpp                 |  10 +-
 cpp/arcticdb/entity/types.hpp                 |  23 +-
 cpp/arcticdb/pipeline/column_mapping.cpp      | 137 +++++++++++
 cpp/arcticdb/pipeline/column_mapping.hpp      | 152 +++---------
 cpp/arcticdb/pipeline/frame_utils.hpp         |   7 +-
 cpp/arcticdb/pipeline/read_frame.cpp          | 193 ++++++++-------
 cpp/arcticdb/pipeline/write_frame.cpp         |  15 +-
 cpp/arcticdb/processing/aggregation.cpp       |   5 -
 cpp/arcticdb/python/python_handlers.cpp       |  74 +++---
 cpp/arcticdb/python/python_handlers.hpp       |  43 ++--
 cpp/arcticdb/python/python_module.cpp         |   1 -
 cpp/arcticdb/stream/index.hpp                 |   2 +-
 cpp/arcticdb/util/sparse_utils.hpp            |  17 +-
 cpp/arcticdb/util/type_handler.hpp            |  17 +-
 python/tests/conftest.py                      |  33 ++-
 .../version_store/test_empty_column_type.py   | 220 +++++++++++++++---
 25 files changed, 816 insertions(+), 500 deletions(-)
 create mode 100644 cpp/arcticdb/entity/descriptors.hpp
 create mode 100644 cpp/arcticdb/entity/type_utils.cpp
 create mode 100644 cpp/arcticdb/pipeline/column_mapping.cpp

diff --git a/cpp/arcticdb/CMakeLists.txt b/cpp/arcticdb/CMakeLists.txt
index 5b28097825..71e347ae62 100644
--- a/cpp/arcticdb/CMakeLists.txt
+++ b/cpp/arcticdb/CMakeLists.txt
@@ -195,6 +195,7 @@ set(arcticdb_srcs
         entity/key.hpp
         entity/metrics.hpp
         entity/data_error.hpp
+        entity/descriptors.hpp
         entity/native_tensor.hpp
         entity/performance_tracing.hpp
         entity/protobuf_mappings.hpp
@@ -203,6 +204,7 @@ set(arcticdb_srcs
         entity/serialized_key.hpp
         entity/type_conversion.hpp
         entity/types.hpp
+        entity/type_utils.hpp
         entity/types-inl.hpp
         entity/variant_key.hpp
         entity/versioned_item.hpp
@@ -370,7 +372,9 @@ set(arcticdb_srcs
         entity/metrics.cpp
         entity/performance_tracing.cpp
         entity/types.cpp
+        entity/type_utils.cpp
         log/log.cpp
+        pipeline/column_mapping.cpp
         pipeline/column_stats.cpp
         pipeline/frame_slice.cpp
         pipeline/frame_utils.cpp
diff --git a/cpp/arcticdb/column_store/column.cpp b/cpp/arcticdb/column_store/column.cpp
index 23ba7cf152..4d62a6b4d7 100644
--- a/cpp/arcticdb/column_store/column.cpp
+++ b/cpp/arcticdb/column_store/column.cpp
@@ -90,14 +90,14 @@ void Column::unsparsify(size_t num_rows) {
     if(!sparse_map_)
         return;
 
-    type_.visit_tag([that=this, num_rows] (const auto tdt) {
+    type_.visit_tag([this, num_rows] (const auto tdt) {
         using TagType = decltype(tdt);
         using RawType = typename TagType::DataTypeTag::raw_type;
         const auto dest_bytes = num_rows * sizeof(RawType);
         auto dest = ChunkedBuffer::presized(dest_bytes);
         util::default_initialize<TagType>(dest.data(), dest_bytes);
-        util::expand_dense_buffer_using_bitmap<RawType>(that->sparse_map_.value(), that->data_.buffer().data(), dest.data());
-        std::swap(dest, that->data_.buffer());
+        util::expand_dense_buffer_using_bitmap<RawType>(sparse_map_.value(), data_.buffer().data(), dest.data());
+        std::swap(dest, data_.buffer());
     });
     sparse_map_ = std::nullopt;
     last_logical_row_ = last_physical_row_ = static_cast<ssize_t>(num_rows) - 1;
@@ -106,14 +106,13 @@ void Column::unsparsify(size_t num_rows) {
 }
 
 void Column::sparsify() {
-    type().visit_tag([that=this](auto type_desc_tag) {
-        using TDT = decltype(type_desc_tag);
-        using RawType = typename TDT::DataTypeTag::raw_type;
-        if constexpr(is_floating_point_type(TDT::DataTypeTag::data_type)) {
-            auto raw_ptr =reinterpret_cast<const RawType*>(that->ptr());
-            auto buffer = util::scan_floating_point_to_sparse(raw_ptr, that->row_count(), that->sparse_map());
-            std::swap(that->data().buffer(), buffer);
-            that->last_physical_row_ = that->sparse_map().count() - 1;
+    type().visit_tag([this](auto type_desc_tag) {
+        using RawType = typename decltype(type_desc_tag)::DataTypeTag::raw_type;
+        if constexpr (is_floating_point_type(type_desc_tag.data_type())) {
+            auto raw_ptr = reinterpret_cast<const RawType*>(ptr());
+            auto buffer = util::scan_floating_point_to_sparse(raw_ptr, row_count(), sparse_map());
+            std::swap(data().buffer(), buffer);
+            last_physical_row_ = sparse_map().count() - 1;
         }
     });
 }
diff --git a/cpp/arcticdb/column_store/column.hpp b/cpp/arcticdb/column_store/column.hpp
index de72357498..b8f69d1599 100644
--- a/cpp/arcticdb/column_store/column.hpp
+++ b/cpp/arcticdb/column_store/column.hpp
@@ -217,7 +217,7 @@ class Column {
             // it pointers to an array type (at numpy arrays at the moment). When we allocate a column for an array we
             // need to allocate space for one pointer per row. This also affects how we handle arrays to python as well.
             // Check cpp/arcticdb/column_store/column_utils.hpp::array_at and cpp/arcticdb/column_store/column.hpp::Column
-            data_(expected_rows * (type.dimension() == Dimension::Dim0 ? get_type_size(type.data_type()) : sizeof(void*)), presize),
+            data_(expected_rows * entity::sizeof_datatype(type), presize),
             type_(type),
             allow_sparse_(allow_sparse){
         ARCTICDB_TRACE(log::inmem(), "Creating column with descriptor {}", type);
diff --git a/cpp/arcticdb/column_store/column_utils.hpp b/cpp/arcticdb/column_store/column_utils.hpp
index 71fc1192a9..351617a62b 100644
--- a/cpp/arcticdb/column_store/column_utils.hpp
+++ b/cpp/arcticdb/column_store/column_utils.hpp
@@ -53,9 +53,7 @@ inline py::array array_at(const SegmentInMemory& frame, std::size_t col_pos, py:
                 // (numpy) array, thus the size of the element is not the size of the raw type, but the size of a pointer.
                 // This also affects how we allocate columns. Check cpp/arcticdb/column_store/column.hpp::Column and
                 // cpp/arcticdb/pipeline/column_mapping.hpp::sizeof_datatype
-                if constexpr(tag.dimension() > Dimension::Dim0) {
-                    esize = sizeof(PyObject*);
-                }
+                esize = sizeof_datatype(TypeDescriptor{tag});
             } else if constexpr(tag.dimension() == Dimension::Dim2) {
                 util::raise_rte("Read resulted in two dimensional type. This is not supported.");
             } else {
@@ -105,9 +103,7 @@ inline py::array array_at(const SegmentInMemory& frame, std::size_t col_pos, py:
             // (numpy) array, thus the size of the element is not the size of the raw type, but the size of a pointer.
             // This also affects how we allocate columns. Check cpp/arcticdb/column_store/column.hpp::Column and
             // cpp/arcticdb/pipeline/column_mapping.hpp::sizeof_datatype
-            if constexpr(tag.dimension() > Dimension::Dim0) {
-                esize = sizeof(PyObject*);
-            }
+            esize = sizeof_datatype(TypeDescriptor{tag});
         } else if constexpr(tag.dimension() == Dimension::Dim2) {
             util::raise_rte("Read resulted in two dimensional type. This is not supported.");
         } else {
diff --git a/cpp/arcticdb/entity/descriptors.hpp b/cpp/arcticdb/entity/descriptors.hpp
new file mode 100644
index 0000000000..87ffaa041f
--- /dev/null
+++ b/cpp/arcticdb/entity/descriptors.hpp
@@ -0,0 +1,14 @@
+/* Copyright 2024 Man Group Operations Limited
+ *
+ * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt.
+ *
+ * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software
+ * will be governed by the Apache License, version 2.0.
+ */
+
+#pragma once
+
+#include <descriptors.pb.h>
+namespace arcticdb::proto {
+    namespace descriptors = arcticc::pb2::descriptors_pb2;
+}
\ No newline at end of file
diff --git a/cpp/arcticdb/entity/timeseries_descriptor.hpp b/cpp/arcticdb/entity/timeseries_descriptor.hpp
index 806051f129..3802fa831d 100644
--- a/cpp/arcticdb/entity/timeseries_descriptor.hpp
+++ b/cpp/arcticdb/entity/timeseries_descriptor.hpp
@@ -7,7 +7,6 @@
 
 #pragma once
 
-#include <proto/arcticc/pb2/proto/descriptors.pb.h>
 #include <arcticdb/entity/field_collection.hpp>
 #include <arcticdb/entity/stream_descriptor.hpp>
 
diff --git a/cpp/arcticdb/entity/type_utils.cpp b/cpp/arcticdb/entity/type_utils.cpp
new file mode 100644
index 0000000000..715939590d
--- /dev/null
+++ b/cpp/arcticdb/entity/type_utils.cpp
@@ -0,0 +1,160 @@
+/* Copyright 2024 Man Group Operations Limited
+ *
+ * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt.
+ *
+ * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software
+ * will be governed by the Apache License, version 2.0.
+ */
+
+#include <arcticdb/entity/type_utils.hpp>
+#include <arcticdb/entity/types.hpp>
+
+namespace arcticdb {
+    bool trivially_compatible_types(const entity::TypeDescriptor& left, const entity::TypeDescriptor& right) {
+        if (left == right)
+            return true;
+
+        // Multidimensional types are pointers
+        if (left.dimension() >= entity::Dimension::Dim1 && right.dimension() >= entity::Dimension::Dim1)
+            return true;
+
+        // Multidimensional types are pointers the empty type is pointer as well
+        if (left.dimension() >= entity::Dimension::Dim1 && is_empty_type(right.data_type()))
+            return true;
+
+        // Multidimensional types are pointers the empty type is pointer as well
+        if (right.dimension() >= entity::Dimension::Dim1 && is_empty_type(left.data_type()))
+            return true;
+
+        if (is_sequence_type(left.data_type()) && is_sequence_type(right.data_type())) {
+            // TODO coercion of utf strings is not always safe, should allow safe conversion and reinstate the
+            // stronger requirement for trivial conversion below.
+            //        if(!is_utf_type(slice_value_type(left.data_type)) &&
+            //        !is_utf_type(slice_value_type(right.data_type)))
+            //            return true;
+
+            return is_utf_type(slice_value_type(left.data_type())) == is_utf_type(slice_value_type(right.data_type()));
+        }
+
+        return false;
+    }
+
+    std::optional<entity::TypeDescriptor> has_valid_type_promotion(
+        const entity::TypeDescriptor& source,
+        const entity::TypeDescriptor& target
+    ) {
+
+        if (source.dimension() != target.dimension()) {
+            // Empty of dimension 0 means lack of any given type and can be promoted to anything (even if the dimensions
+            // don't match), e.g. empty type can become int or array of ints. Empty type of higher dimension is used to
+            // specify an empty array or an empty matrix, thus it cannot become any other type unless the dimensionality
+            // matches
+            if (is_empty_type(source.data_type()) && source.dimension() == entity::Dimension::Dim0)
+                return target;
+            return std::nullopt;
+        }
+
+        if (source == target)
+            return target;
+
+        // Empty type is coercible to any type
+        if (is_empty_type(source.data_type())) {
+            return target;
+        }
+
+        // Nothing is coercible to the empty type.
+        if (is_empty_type(target.data_type())) {
+            return std::nullopt;
+        }
+
+        auto source_type = source.data_type();
+        auto target_type = target.data_type();
+        auto source_size = slice_bit_size(source_type);
+        auto target_size = slice_bit_size(target_type);
+
+        if (is_time_type(source_type)) {
+            if (!is_time_type(target_type))
+                return std::nullopt;
+        } else if (is_unsigned_type(source_type)) {
+            if (is_unsigned_type(target_type)) {
+                // UINT->UINT, target_size must be >= source_size
+                if (source_size > target_size)
+                    return std::nullopt;
+            } else if (is_signed_type(target_type)) {
+                // UINT->INT, target_size must be > source_size
+                if (source_size >= target_size)
+                    return std::nullopt;
+            } else if (is_floating_point_type(target_type)) {
+                // UINT->FLOAT, no restrictions on relative sizes
+            } else {
+                // Non-numeric target type
+                return std::nullopt;
+            }
+        } else if (is_signed_type(source_type)) {
+            if (is_unsigned_type(target_type)) {
+                // INT->UINT never promotable
+                return std::nullopt;
+            } else if (is_signed_type(target_type)) {
+                // INT->INT, target_size must be >= source_size
+                if (source_size > target_size)
+                    return std::nullopt;
+            } else if (is_floating_point_type(target_type)) {
+                // INT->FLOAT, no restrictions on relative sizes
+            } else {
+                // Non-numeric target type
+                return std::nullopt;
+            }
+        } else if (is_floating_point_type(source_type)) {
+            if (is_unsigned_type(target_type) || is_signed_type(target_type)) {
+                // FLOAT->U/INT never promotable
+                return std::nullopt;
+            } else if (is_floating_point_type(target_type)) {
+                // FLOAT->FLOAT, target_size must be >= source_size
+                if (source_size > target_size)
+                    return std::nullopt;
+            } else {
+                // Non-numeric target type
+                return std::nullopt;
+            }
+        } else if (is_sequence_type(source_type) && is_sequence_type(target_type)) {
+            // Only allow promotion with UTF strings, and only to dynamic (never to fixed width)
+            if (!is_utf_type(source_type) || !is_utf_type(target_type) || !is_dynamic_string_type(target_type))
+                return std::nullopt;
+        } else if (is_py_bool_type(source_type)) {
+            return std::nullopt;
+        } else {
+            // Non-numeric source type
+            return std::nullopt;
+        }
+
+        return entity::TypeDescriptor{
+            combine_data_type(slice_value_type(target_type), target_size),
+            target.dimension()};
+    }
+
+    std::optional<entity::TypeDescriptor> has_valid_type_promotion(
+        const proto::descriptors::TypeDescriptor& source,
+        const proto::descriptors::TypeDescriptor& target
+    ) {
+        return has_valid_type_promotion(entity::type_desc_from_proto(source), entity::type_desc_from_proto(target));
+    }
+
+    std::optional<entity::TypeDescriptor> has_valid_common_type(
+        const entity::TypeDescriptor& left,
+        const entity::TypeDescriptor& right
+    ) {
+        auto maybe_common_type = has_valid_type_promotion(left, right);
+        if (!maybe_common_type) {
+            maybe_common_type = has_valid_type_promotion(right, left);
+        }
+        return maybe_common_type;
+    }
+
+    std::optional<entity::TypeDescriptor> has_valid_common_type(
+        const proto::descriptors::TypeDescriptor& left,
+        const proto::descriptors::TypeDescriptor& right
+    ) {
+        return has_valid_common_type(entity::type_desc_from_proto(left), entity::type_desc_from_proto(right));
+    }
+
+}
\ No newline at end of file
diff --git a/cpp/arcticdb/entity/type_utils.hpp b/cpp/arcticdb/entity/type_utils.hpp
index 13804582f7..4ecc0f2606 100644
--- a/cpp/arcticdb/entity/type_utils.hpp
+++ b/cpp/arcticdb/entity/type_utils.hpp
@@ -6,146 +6,36 @@
  */
 
 #pragma once
-
-#include <arcticdb/entity/types.hpp>
+#include<optional>
+#include<arcticdb/entity/descriptors.hpp>
 
 namespace arcticdb {
 
-inline bool trivially_compatible_types(entity::TypeDescriptor left, entity::TypeDescriptor right) {
-    if(left == right)
-        return true;
-
-    // Multidimensional types are pointers
-    if(left.dimension() >= entity::Dimension::Dim1 && right.dimension() >= entity::Dimension::Dim1)
-        return true;
-
-    // Multidimensional types are pointer the empty type is pointer as well
-    if(left.dimension() >= entity::Dimension::Dim1 && is_empty_type(right.data_type()))
-        return true;
-    
-    // Multidimensional types are pointer the empty type is pointer as well
-    if(right.dimension() >= entity::Dimension::Dim1 && is_empty_type(left.data_type()))
-        return true;
-
-    if(is_sequence_type(left.data_type()) && is_sequence_type(right.data_type())) {
-        //TODO coercion of utf strings is not always safe, should allow safe conversion and reinstate the
-        //stronger requirement for trivial conversion below.
-//        if(!is_utf_type(slice_value_type(left.data_type)) && !is_utf_type(slice_value_type(right.data_type)))
-//            return true;
-
-        return is_utf_type(slice_value_type(left.data_type()))  == is_utf_type(slice_value_type(right.data_type()));
-    }
-
-    if(is_py_bool_type(right.data_type()) && is_py_bool_type(right.data_type())) {
-        return true;
-    }
-
-    return false;
+namespace entity {
+    struct TypeDescriptor;
 }
 
-inline std::optional<entity::TypeDescriptor> has_valid_type_promotion(entity::TypeDescriptor source, entity::TypeDescriptor target) {
-
-    if(source.dimension() != target.dimension()) {
-        // Empty of dimension 0 means lack of any given type and can be promoted to anything (even if the dimensions don't
-        // match), e.g. empty type can become int or array of ints. Empty type of higher dimension is used to specify an
-        // empty array or an empty matrix, thus it cannot become any other type unless the dimensionality matches
-        if(is_empty_type(source.data_type()) && source.dimension() == entity::Dimension::Dim0)
-            return target;
-        return std::nullopt;
-    }
-
-    if (source == target)
-        return target;
+/// Two types are trivially compatible if their byte representation is exactly the same i.e. you can memcpy
+/// n elements of left type from one buffer to n elements of type right in another buffer and get the same result
+[[nodiscard]] bool trivially_compatible_types(const entity::TypeDescriptor& left, const entity::TypeDescriptor& right);
 
-    // Empty type is coercible to any type
-    if(is_empty_type(source.data_type())) {
-        return target;
-    }
+[[nodiscard]] std::optional<entity::TypeDescriptor> has_valid_type_promotion(
+    const entity::TypeDescriptor& source,
+    const entity::TypeDescriptor& target
+);
 
-    // Nothing is coercible to the empty type.
-    if(is_empty_type(target.data_type())) {
-        return std::nullopt;
-    }
-
-    auto source_type = source.data_type();
-    auto target_type = target.data_type();
-    auto source_size = slice_bit_size(source_type);
-    auto target_size = slice_bit_size(target_type);
-
-    if (is_time_type(source_type)) {
-        if (!is_time_type(target_type))
-            return std::nullopt;
-    } else if (is_unsigned_type(source_type)) {
-        if (is_unsigned_type(target_type)) {
-            // UINT->UINT, target_size must be >= source_size
-            if (source_size > target_size)
-                return std::nullopt;
-        } else if (is_signed_type(target_type)) {
-            // UINT->INT, target_size must be > source_size
-            if (source_size >= target_size)
-                return std::nullopt;
-        } else if (is_floating_point_type(target_type)) {
-            // UINT->FLOAT, no restrictions on relative sizes
-        } else {
-            // Non-numeric target type
-            return std::nullopt;
-        }
-    } else if (is_signed_type(source_type)) {
-        if (is_unsigned_type(target_type)) {
-            // INT->UINT never promotable
-            return std::nullopt;
-        } else if (is_signed_type(target_type)) {
-            // INT->INT, target_size must be >= source_size
-            if (source_size > target_size)
-                return std::nullopt;
-        } else if (is_floating_point_type(target_type)) {
-            // INT->FLOAT, no restrictions on relative sizes
-        } else {
-            // Non-numeric target type
-            return std::nullopt;
-        }
-    } else if (is_floating_point_type(source_type)) {
-        if (is_unsigned_type(target_type) || is_signed_type(target_type)) {
-            // FLOAT->U/INT never promotable
-            return std::nullopt;
-        } else if (is_floating_point_type(target_type)) {
-            // FLOAT->FLOAT, target_size must be >= source_size
-            if (source_size > target_size)
-                return std::nullopt;
-        } else {
-            // Non-numeric target type
-            return std::nullopt;
-        }
-    } else if (is_sequence_type(source_type) && is_sequence_type(target_type)) {
-        // Only allow promotion with UTF strings, and only to dynamic (never to fixed width)
-        if (!is_utf_type(source_type) || !is_utf_type(target_type) || !is_dynamic_string_type(target_type))
-            return std::nullopt;
-    } else {
-        // Non-numeric source type
-        return std::nullopt;
-    }
-
-    return entity::TypeDescriptor{combine_data_type(slice_value_type(target_type), target_size), target.dimension()};
-}
-
-inline std::optional<entity::TypeDescriptor> has_valid_type_promotion(
+[[nodiscard]] std::optional<entity::TypeDescriptor> has_valid_type_promotion(
     const proto::descriptors::TypeDescriptor& source,
-    const proto::descriptors::TypeDescriptor& target) {
-    return has_valid_type_promotion(entity::type_desc_from_proto(source), entity::type_desc_from_proto(target));
-}
+    const proto::descriptors::TypeDescriptor& target
+);
 
-inline std::optional<entity::TypeDescriptor> has_valid_common_type(entity::TypeDescriptor left, entity::TypeDescriptor right) {
-    auto maybe_common_type = has_valid_type_promotion(left, right);
-    if (!maybe_common_type) {
-        maybe_common_type = has_valid_type_promotion(right, left);
-    }
-    return maybe_common_type;
-}
+[[nodiscard]] std::optional<entity::TypeDescriptor> has_valid_common_type(
+    const entity::TypeDescriptor& left,
+    const entity::TypeDescriptor& right
+);
 
-inline std::optional<entity::TypeDescriptor> has_valid_common_type(
+[[nodiscard]] std::optional<entity::TypeDescriptor> has_valid_common_type(
     const proto::descriptors::TypeDescriptor& left,
-    const proto::descriptors::TypeDescriptor& right) {
-    return has_valid_common_type(entity::type_desc_from_proto(left), entity::type_desc_from_proto(right));
-}
-
+    const proto::descriptors::TypeDescriptor& right
+);
 }
diff --git a/cpp/arcticdb/entity/types-inl.hpp b/cpp/arcticdb/entity/types-inl.hpp
index d9d80c87e0..cb16fafbdf 100644
--- a/cpp/arcticdb/entity/types-inl.hpp
+++ b/cpp/arcticdb/entity/types-inl.hpp
@@ -18,7 +18,7 @@ namespace arcticdb::entity {
 namespace details {
 
 template<class DimType, class Callable>
-auto visit_dim(DataType dt, Callable &&c) {
+constexpr auto visit_dim(DataType dt, Callable &&c) {
     switch (dt) {
 #define DT_CASE(__T__) case DataType::__T__: \
         return c(TypeDescriptorTag<DataTypeTag<DataType::__T__>, DimType>());
@@ -40,7 +40,6 @@ auto visit_dim(DataType dt, Callable &&c) {
         DT_CASE(UTF_DYNAMIC64)
         DT_CASE(EMPTYVAL)
         DT_CASE(PYBOOL8)
-        DT_CASE(PYBOOL64)
 #undef DT_CASE
         default: util::raise_rte("Invalid dtype '{}' in visit dim", datatype_to_str(dt));
     }
@@ -77,7 +76,7 @@ auto visit_type(DataType dt, Callable &&c) {
 } // namespace details
 
 template<class Callable>
-auto TypeDescriptor::visit_tag(Callable &&callable) const {
+constexpr auto TypeDescriptor::visit_tag(Callable &&callable) const {
     switch (dimension_) {
         case Dimension::Dim0: return details::visit_dim<DimensionTag<Dimension::Dim0>>(data_type_, callable);
         case Dimension::Dim1: return details::visit_dim<DimensionTag<Dimension::Dim1>>(data_type_, callable);
diff --git a/cpp/arcticdb/entity/types.cpp b/cpp/arcticdb/entity/types.cpp
index 5da45658bd..4f7593b23a 100644
--- a/cpp/arcticdb/entity/types.cpp
+++ b/cpp/arcticdb/entity/types.cpp
@@ -6,6 +6,7 @@
  */
 
 #include <arcticdb/entity/types.hpp>
+#include <arcticdb/util/type_handler.hpp>
 
 namespace arcticdb::entity {
 
@@ -37,10 +38,17 @@ std::string_view datatype_to_str(const DataType dt) {
         TO_STR(UTF_DYNAMIC64)
         TO_STR(EMPTYVAL)
         TO_STR(PYBOOL8)
-        TO_STR(PYBOOL64)
 #undef TO_STR
         default:return std::string_view("UNKNOWN");
     }
 }
 
+std::size_t sizeof_datatype(const TypeDescriptor& td) {
+    return td.visit_tag([](auto tdt) {
+        using RawType = typename std::decay_t<decltype(tdt)>::DataTypeTag::raw_type;
+        auto handler = TypeHandlerRegistry::instance()->get_handler(TypeDescriptor{tdt});
+        return handler ? handler->type_size() : sizeof(RawType);
+    });
+}
+
 } // namespace arcticdb
diff --git a/cpp/arcticdb/entity/types.hpp b/cpp/arcticdb/entity/types.hpp
index 771a03d083..2a16a30df1 100644
--- a/cpp/arcticdb/entity/types.hpp
+++ b/cpp/arcticdb/entity/types.hpp
@@ -12,6 +12,7 @@
 #include <arcticdb/util/variant.hpp>
 #include <arcticdb/log/log.hpp>
 #include <google/protobuf/util/message_differencer.h>
+#include <arcticdb/entity/descriptors.hpp>
 
 #include <fmt/format.h>
 
@@ -33,13 +34,6 @@
 using ssize_t = SSIZE_T;
 #endif
 
-// TODO: Forward declare further and move the inclusion of `descriptors.pb.h` in `types.cpp`.
-#include <descriptors.pb.h>
-
-namespace arcticdb::proto {
-    namespace descriptors = arcticc::pb2::descriptors_pb2;
-}
-
 namespace arcticdb::entity {
 
 enum class SortedValue : uint8_t {
@@ -232,7 +226,6 @@ enum class DataType : uint8_t {
     UTF_DYNAMIC64 = detail::combine_val_bits(ValueType::UTF_DYNAMIC, SizeBits::S64),
     EMPTYVAL = detail::combine_val_bits(ValueType::EMPTY, SizeBits::S64),
     PYBOOL8 = detail::combine_val_bits(ValueType::PYBOOL, SizeBits::S8),
-    PYBOOL64 = detail::combine_val_bits(ValueType::PYBOOL, SizeBits::S64),
     UNKNOWN = 0,
 };
 
@@ -418,7 +411,6 @@ DATA_TYPE_TAG(UTF_FIXED64, std::uint64_t)
 DATA_TYPE_TAG(UTF_DYNAMIC64, std::uint64_t)
 DATA_TYPE_TAG(EMPTYVAL, std::uint64_t)
 DATA_TYPE_TAG(PYBOOL8, uint8_t)
-DATA_TYPE_TAG(PYBOOL64, std::uint64_t)
 #undef DATA_TYPE_TAG
 
 enum class Dimension : uint8_t {
@@ -474,7 +466,7 @@ struct TypeDescriptor {
     ARCTICDB_MOVE_COPY_DEFAULT(TypeDescriptor)
 
     template<typename Callable>
-    auto visit_tag(Callable &&callable) const;
+    constexpr auto visit_tag(Callable &&callable) const;
 
     bool operator==(const TypeDescriptor &o) const {
         return data_type_ == o.data_type_ && dimension_ == o.dimension_;
@@ -552,16 +544,20 @@ struct TypeDescriptorTag {
     using DataTypeTag = DT;
     using DimensionTag = D;
     explicit constexpr operator TypeDescriptor() const {
-        return TypeDescriptor{DataTypeTag::data_type, DimensionTag::value};
+        return type_descriptor();
     }
 
-    [[nodiscard]] constexpr Dimension dimension() const {
+    [[nodiscard]] static constexpr Dimension dimension() {
         return DimensionTag::value;
     }
 
-    [[nodiscard]] constexpr DataType data_type() const {
+    [[nodiscard]] static constexpr DataType data_type() {
         return DataTypeTag::data_type;
     }
+
+    [[nodiscard]] static constexpr TypeDescriptor type_descriptor() {
+        return TypeDescriptor{DataTypeTag::data_type, DimensionTag::value};
+    }
 };
 
 inline DataType get_data_type(const arcticdb::proto::descriptors::TypeDescriptor& type_desc) {
@@ -804,6 +800,7 @@ inline bool operator!=(const Field& l, const Field& r) {
     return !(l == r);
 }
 
+std::size_t sizeof_datatype(const TypeDescriptor& td);
 } // namespace arcticdb::entity
 
 // StreamId ordering - numbers before strings
diff --git a/cpp/arcticdb/pipeline/column_mapping.cpp b/cpp/arcticdb/pipeline/column_mapping.cpp
new file mode 100644
index 0000000000..f348cb9f49
--- /dev/null
+++ b/cpp/arcticdb/pipeline/column_mapping.cpp
@@ -0,0 +1,137 @@
+/* Copyright 2024 Man Group Operations Limited
+ *
+ * Use of this software is governed by the Business Source License 1.1 included in the file licenses/BSL.txt.
+ *
+ * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software
+ * will be governed by the Apache License, version 2.0.
+ */
+
+#include <arcticdb/pipeline/column_mapping.hpp>
+#include <arcticdb/column_store/memory_segment.hpp>
+#include <arcticdb/pipeline/pipeline_context.hpp>
+
+namespace arcticdb {
+    ColumnMapping::ColumnMapping(
+        SegmentInMemory& frame,
+        size_t dst_col,
+        size_t field_col,
+        pipelines::PipelineContextRow& context
+    ) :
+            source_type_desc_(context.descriptor().fields(field_col).type()),
+            dest_type_desc_(frame.field(dst_col).type()),
+            frame_field_descriptor_(frame.field(dst_col)),
+            dest_size_(sizeof_datatype(dest_type_desc_)),
+            num_rows_(context.slice_and_key().slice_.row_range.diff()),
+            first_row_(context.slice_and_key().slice_.row_range.first - frame.offset()),
+            offset_bytes_(dest_size_ * first_row_),
+            dest_bytes_(dest_size_ * num_rows_) {
+    }
+
+    StaticColumnMappingIterator::StaticColumnMappingIterator(
+        pipelines::PipelineContextRow& context,
+        size_t index_fieldcount
+    ) :
+            index_fieldcount_(index_fieldcount),
+            field_count_(context.slice_and_key().slice_.col_range.diff() + index_fieldcount),
+            first_slice_col_offset_(context.slice_and_key().slice_.col_range.first),
+            last_slice_col_offset_(context.slice_and_key().slice_.col_range.second),
+            bit_set_(context.get_selected_columns()) {
+        prev_col_offset_ = first_slice_col_offset_ - 1;
+        if (bit_set_) {
+            source_col_ = (*bit_set_)[bv_size(first_slice_col_offset_)]
+                ? first_slice_col_offset_
+                : bit_set_->get_next(bv_size(first_slice_col_offset_));
+            if ((*bit_set_)[bv_size(first_slice_col_offset_)]) {
+                source_col_ = first_slice_col_offset_;
+            } else {
+                auto next_pos = bit_set_->get_next(bv_size(first_slice_col_offset_));
+                // We have to do this extra check in bitmagic, get_next returns 0 in case no next present
+                if (next_pos == 0 && bit_set_->size() > 0 && !bit_set_->test(0))
+                    invalid_ = true;
+                else
+                    source_col_ = next_pos;
+            }
+            if (source_col_ < first_slice_col_offset_)
+                invalid_ = true;
+
+        } else {
+            source_col_ = first_slice_col_offset_;
+        }
+
+        dst_col_ = bit_set_ ? bit_set_->count_range(0, bv_size(source_col_)) - 1 : source_col_;
+        source_field_pos_ = (source_col_ - first_slice_col_offset_) + index_fieldcount_;
+    }
+
+    std::optional<size_t> StaticColumnMappingIterator::get_next_source_col() const {
+        if (!bit_set_) {
+            return source_col_ + 1;
+        } else {
+            auto next_pos = bit_set_->get_next(bv_size(source_col_));
+            if (next_pos == 0)
+                return std::nullopt;
+            else
+                return next_pos;
+        }
+    }
+
+    void StaticColumnMappingIterator::advance() {
+        ++dst_col_;
+        prev_col_offset_ = source_col_;
+        auto new_source_col = get_next_source_col();
+        if (new_source_col) {
+            source_col_ = *new_source_col;
+            source_field_pos_ = (source_col_ - first_slice_col_offset_) + index_fieldcount_;
+        } else {
+            source_field_pos_ = field_count_;
+            source_col_ = last_slice_col_offset_;
+        }
+    }
+
+    bool StaticColumnMappingIterator::invalid() const {
+        return invalid_;
+    }
+
+    bool StaticColumnMappingIterator::has_next() const {
+        return source_field_pos_ < field_count_;
+    }
+
+    bool StaticColumnMappingIterator::at_end_of_selected() const {
+        return !source_col_ || source_col_ >= last_slice_col_offset_;
+    }
+
+    size_t StaticColumnMappingIterator::remaining_fields() const {
+        return field_count_ - source_field_pos_;
+    }
+
+    size_t StaticColumnMappingIterator::prev_col_offset() const {
+        return prev_col_offset_;
+    }
+
+    size_t StaticColumnMappingIterator::source_field_pos() const {
+        return source_field_pos_;
+    }
+
+    size_t StaticColumnMappingIterator::source_col() const {
+        return source_col_;
+    }
+
+    size_t StaticColumnMappingIterator::first_slice_col_offset() const {
+        return first_slice_col_offset_;
+    }
+
+    size_t StaticColumnMappingIterator::last_slice_col_offset() const {
+        return last_slice_col_offset_;
+    }
+
+    size_t StaticColumnMappingIterator::dest_col() const {
+        return dst_col_;
+    }
+
+    size_t StaticColumnMappingIterator::field_count() const {
+        return field_count_;
+    }
+
+    size_t StaticColumnMappingIterator::index_fieldcount() const {
+        return index_fieldcount_;
+    }
+} // namespace arcticdb
\ No newline at end of file
diff --git a/cpp/arcticdb/pipeline/column_mapping.hpp b/cpp/arcticdb/pipeline/column_mapping.hpp
index 3279ea7493..1f923623c2 100644
--- a/cpp/arcticdb/pipeline/column_mapping.hpp
+++ b/cpp/arcticdb/pipeline/column_mapping.hpp
@@ -5,41 +5,29 @@
  * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
  */
 
+#pragma once
 #include <arcticdb/entity/types.hpp>
+#include <arcticdb/util/bitset.hpp>
+
+namespace arcticdb::pipelines {
+    struct PipelineContextRow;
+}
 
 namespace arcticdb {
 
-inline std::size_t sizeof_datatype(const TypeDescriptor &td) {
-    return td.visit_tag([](auto dt) {
-        using RawType = typename std::decay_t<decltype(dt)>::DataTypeTag::raw_type;
-        // Array types are stored on disk as flat sequences. The python layer cannot work with this. We need to pass
-        // it pointers to an array type (at numpy arrays at the moment). When we allocate a column for an array we
-        // need to allocate space for one pointer per row. This also affects how we handle arrays to python as well.
-        // Check cpp/arcticdb/column_store/column_utils.hpp::array_at and cpp/arcticdb/column_store/column.hpp
-        return dt.dimension() == Dimension::Dim0 ? sizeof(RawType) : sizeof(void*);
-    });
-}
+class SegmentInMemory;
 
 struct ColumnMapping {
-    const TypeDescriptor source_type_desc_;
-    const TypeDescriptor dest_type_desc_;
-    const Field& frame_field_descriptor_;
+    const entity::TypeDescriptor source_type_desc_;
+    const entity::TypeDescriptor dest_type_desc_;
+    const entity::Field& frame_field_descriptor_;
     const size_t dest_size_;
     const size_t num_rows_;
     const size_t first_row_;
     const size_t offset_bytes_;
     const size_t dest_bytes_;
 
-    ColumnMapping(SegmentInMemory &frame, size_t dst_col, size_t field_col, pipelines::PipelineContextRow &context) :
-        source_type_desc_(context.descriptor().fields(field_col).type()),
-        dest_type_desc_(frame.field(dst_col).type()),
-        frame_field_descriptor_(frame.field(dst_col)),
-        dest_size_(sizeof_datatype(dest_type_desc_)),
-        num_rows_(context.slice_and_key().slice_.row_range.diff()),
-        first_row_(context.slice_and_key().slice_.row_range.first - frame.offset()),
-        offset_bytes_(dest_size_ * first_row_),
-        dest_bytes_(dest_size_ * num_rows_) {
-    }
+    ColumnMapping(SegmentInMemory& frame, size_t dst_col, size_t field_col, pipelines::PipelineContextRow& context);
 };
 
 struct StaticColumnMappingIterator {
@@ -53,109 +41,23 @@ struct StaticColumnMappingIterator {
     size_t dst_col_ = 0;
     bool invalid_ = false;
     const std::optional<util::BitSet>& bit_set_;
-    
-    StaticColumnMappingIterator(pipelines::PipelineContextRow& context, size_t index_fieldcount) :
-        index_fieldcount_(index_fieldcount),
-        field_count_(context.slice_and_key().slice_.col_range.diff() + index_fieldcount),
-        first_slice_col_offset_(context.slice_and_key().slice_.col_range.first),
-        last_slice_col_offset_(context.slice_and_key().slice_.col_range.second),
-        bit_set_(context.get_selected_columns()) {
-            prev_col_offset_ = first_slice_col_offset_ - 1;
-            if(bit_set_) {
-                source_col_ = (*bit_set_)[bv_size(first_slice_col_offset_)] ? first_slice_col_offset_ : bit_set_->get_next(bv_size(first_slice_col_offset_));
-                if ((*bit_set_)[bv_size(first_slice_col_offset_)]) {
-                    source_col_ = first_slice_col_offset_;
-                } else {
-                    auto next_pos = bit_set_->get_next(bv_size(first_slice_col_offset_));
-                    // We have to do this extra check in bitmagic, get_next returns 0 in case no next present
-                    if (next_pos == 0 && bit_set_->size() > 0 && !bit_set_->test(0))
-                        invalid_ = true;
-                    else
-                        source_col_ = next_pos;
-                }
-                if(source_col_ < first_slice_col_offset_)
-                    invalid_ = true;
-
-            } else {
-                source_col_ = first_slice_col_offset_;
-            }
-
-            dst_col_ = bit_set_ ? bit_set_->count_range(0, bv_size(source_col_)) - 1 : source_col_;
-            source_field_pos_ = (source_col_ - first_slice_col_offset_) + index_fieldcount_;
-    }
-
-    std::optional<size_t> get_next_source_col() const {
-        if(!bit_set_) {
-            return source_col_ + 1;
-        } else {
-            auto next_pos = bit_set_->get_next(bv_size(source_col_));
-            if(next_pos == 0)
-                return std::nullopt;
-            else
-                return next_pos;
-        }
-    }
-
-    void advance() {
-        ++dst_col_;
-        prev_col_offset_ = source_col_;
-        auto new_source_col = get_next_source_col();
-        if(new_source_col) {
-            source_col_ = *new_source_col;
-            source_field_pos_ = (source_col_ - first_slice_col_offset_) + index_fieldcount_;
-        } else {
-            source_field_pos_ = field_count_;
-            source_col_ = last_slice_col_offset_;
-        }
-    }
-
-    bool invalid() const {
-        return invalid_;
-    }
-
-    bool has_next() const {
-        return source_field_pos_ < field_count_;
-    }
-
-    bool at_end_of_selected() const {
-        return !source_col_ || source_col_ >= last_slice_col_offset_;
-    }
-
-    size_t remaining_fields() const {
-        return field_count_ - source_field_pos_;
-    }
-
-    size_t prev_col_offset() const {
-        return prev_col_offset_;
-    }
-
-    size_t source_field_pos() const {
-        return source_field_pos_;
-    }
-
-    size_t source_col() const {
-        return source_col_;
-    }
-
-    size_t first_slice_col_offset() const {
-        return first_slice_col_offset_;
-    }
-
-    size_t last_slice_col_offset() const {
-        return last_slice_col_offset_;
-    }
-
-    size_t dest_col() const {
-        return dst_col_;
-    }
-
-    size_t field_count() const {
-        return field_count_;
-    }
 
-    size_t index_fieldcount() const {
-        return index_fieldcount_;
-    }
+    StaticColumnMappingIterator(pipelines::PipelineContextRow& context, size_t index_fieldcount);
+
+    void advance();
+    [[nodiscard]] std::optional<size_t> get_next_source_col() const;
+    [[nodiscard]] bool invalid() const;
+    [[nodiscard]] bool has_next() const;
+    [[nodiscard]] bool at_end_of_selected() const;
+    [[nodiscard]] size_t remaining_fields() const;
+    [[nodiscard]] size_t prev_col_offset() const;
+    [[nodiscard]] size_t source_field_pos() const;
+    [[nodiscard]] size_t source_col() const;
+    [[nodiscard]] size_t first_slice_col_offset() const;
+    [[nodiscard]] size_t last_slice_col_offset() const;
+    [[nodiscard]] size_t dest_col() const;
+    [[nodiscard]] size_t field_count() const;
+    [[nodiscard]] size_t index_fieldcount() const;
 };
 
 
diff --git a/cpp/arcticdb/pipeline/frame_utils.hpp b/cpp/arcticdb/pipeline/frame_utils.hpp
index 11074a77b6..c764063509 100644
--- a/cpp/arcticdb/pipeline/frame_utils.hpp
+++ b/cpp/arcticdb/pipeline/frame_utils.hpp
@@ -209,12 +209,9 @@ std::optional<convert::StringEncodingError> aggregator_set_data(
 
             const auto num_values = bitset.count();
             auto bool_buffer = ChunkedBuffer::presized(num_values * sizeof(uint8_t));
-            auto en = bitset.first();
-            auto en_end = bitset.end();
             auto bool_ptr = bool_buffer.ptr_cast<uint8_t>(0u, num_values);
-            while (en < en_end) {
-                *bool_ptr = static_cast<uint8_t>(PyObject_IsTrue(ptr_data[*en]));
-                ++en;
+            for (auto it = bitset.first(); it < bitset.end(); ++it) {
+                *bool_ptr = static_cast<uint8_t>(PyObject_IsTrue(ptr_data[*it]));
                 ++bool_ptr;
             }
             if(bitset.count() > 0)
diff --git a/cpp/arcticdb/pipeline/read_frame.cpp b/cpp/arcticdb/pipeline/read_frame.cpp
index e8f2e90061..d62a037d17 100644
--- a/cpp/arcticdb/pipeline/read_frame.cpp
+++ b/cpp/arcticdb/pipeline/read_frame.cpp
@@ -56,27 +56,14 @@ void mark_index_slices(
         column_groups).value();
 }
 
-StreamDescriptor modify_output_sizes(StreamDescriptor&& desc) {
-    for(auto& field : desc.mutable_fields()) {
-        if(is_py_bool_type(field.type().data_type())) {
-            field.mutable_type_desc()->set_size_bits(SizeBits::S64);
-        }
-    }
-    return std::move(desc);
-}
-
 StreamDescriptor get_filtered_descriptor(StreamDescriptor&& descriptor, const std::shared_ptr<FieldCollection>& filter_columns) {
     // We assume here that filter_columns_ will always contain the index.
 
     auto desc = std::move(descriptor);
     auto index = stream::index_type_from_descriptor(desc);
     return util::variant_match(index, [&desc, &filter_columns] (const auto& idx) {
-        if(filter_columns) {
-            return modify_output_sizes(StreamDescriptor{index_descriptor(desc.id(), idx, *filter_columns)});
-        }
-        else {
-            return modify_output_sizes(StreamDescriptor{index_descriptor(desc.id(), idx, *desc.fields_ptr())});
-        }
+        const std::shared_ptr<FieldCollection>& fields = filter_columns ? filter_columns : desc.fields_ptr();
+        return StreamDescriptor{index_descriptor(desc.id(), idx, *fields)};
     });
 }
 
@@ -225,21 +212,20 @@ void decode_or_expand_impl(
     const uint8_t*& data,
     uint8_t* dest,
     const EncodedFieldType& encoded_field_info,
-    const TypeDescriptor& source_type_descriptor,
-    const TypeDescriptor& destination_type_descriptor,
     size_t dest_bytes,
     std::shared_ptr<BufferHolder> buffers,
-	EncodingVersion encding_version) {
-    if(auto handler = TypeHandlerRegistry::instance()->get_handler(source_type_descriptor); handler) {
+    EncodingVersion encding_version,
+    const ColumnMapping& m
+) {
+    if (auto handler = TypeHandlerRegistry::instance()->get_handler(m.source_type_desc_); handler) {
         handler->handle_type(
             data,
             dest,
             VariantField{&encoded_field_info},
-            source_type_descriptor,
-            destination_type_descriptor,
             dest_bytes,
             std::move(buffers),
-            encding_version
+            encding_version,
+            m
         );
     } else {
         std::optional<util::BitMagic> bv;
@@ -248,8 +234,8 @@ void decode_or_expand_impl(
             const auto bytes = encoding_sizes::data_uncompressed_size(ndarray);
             ChunkedBuffer sparse{bytes};
             SliceDataSink sparse_sink{sparse.data(), bytes};
-            data += decode_field(source_type_descriptor, encoded_field_info, data, sparse_sink, bv, encding_version);
-            source_type_descriptor.visit_tag([dest, dest_bytes, &bv, &sparse](const auto tdt) {
+            data += decode_field(m.source_type_desc_, encoded_field_info, data, sparse_sink, bv, encding_version);
+            m.source_type_desc_.visit_tag([dest, dest_bytes, &bv, &sparse](const auto tdt) {
                 using TagType = decltype(tdt);
                 using RawType = typename TagType::DataTypeTag::raw_type;
                 util::default_initialize<TagType>(dest, dest_bytes);
@@ -259,12 +245,12 @@ void decode_or_expand_impl(
             SliceDataSink sink(dest, dest_bytes);
             const auto &ndarray = encoded_field_info.ndarray();
             if (const auto bytes = encoding_sizes::data_uncompressed_size(ndarray); bytes < dest_bytes) {
-                source_type_descriptor.visit_tag([dest, bytes, dest_bytes](const auto tdt) {
+                m.source_type_desc_.visit_tag([dest, bytes, dest_bytes](const auto tdt) {
                     using TagType = decltype(tdt);
                     util::default_initialize<TagType>(dest + bytes, dest_bytes - bytes);
                 });
             }
-            data += decode_field(source_type_descriptor, encoded_field_info, data, sink, bv, encding_version);
+            data += decode_field(m.source_type_desc_, encoded_field_info, data, sink, bv, encding_version);
         }
     }
 }
@@ -289,22 +275,20 @@ void decode_or_expand(
     const uint8_t*& data,
     uint8_t* dest,
     const VariantField& variant_field,
-    const TypeDescriptor& source_type_descriptor,
-    const TypeDescriptor& destination_type_descriptor,
     size_t dest_bytes,
     std::shared_ptr<BufferHolder> buffers,
-    EncodingVersion encoding_version
+    EncodingVersion encoding_version,
+    const ColumnMapping& m
 ) {
     util::variant_match(variant_field, [&](auto field) {
         decode_or_expand_impl(
             data,
             dest,
             *field,
-            source_type_descriptor,
-            destination_type_descriptor,
             dest_bytes,
             buffers,
-            encoding_version
+            encoding_version,
+            m
         );
     });
 }
@@ -391,23 +375,29 @@ void decode_into_frame_static(
             ColumnMapping m{frame, it.dest_col(), it.source_field_pos(), context};
             const bool types_trivially_compatible = trivially_compatible_types(m.source_type_desc_, m.dest_type_desc_);
             const bool any_type_is_empty = is_empty_type(m.source_type_desc_.data_type()) || is_empty_type(m.dest_type_desc_.data_type());
-            util::check(types_trivially_compatible || any_type_is_empty, "Column type conversion from {} to {} not implemented in column {}:{} -> {}:{}",
-                        m.source_type_desc_,
-                        m.dest_type_desc_,
-                        it.source_col(),
-                        field_name,
-                        it.dest_col(),
-                        m.frame_field_descriptor_.name());
-            util::check(data != end || remaining_fields_empty(it, context), "Reached end of input block with {} fields to decode", it.remaining_fields());
+            util::check(
+                types_trivially_compatible || any_type_is_empty,
+                "Column type conversion from {} to {} not implemented in column {}:{} -> {}:{}",
+                m.source_type_desc_,
+                m.dest_type_desc_,
+                it.source_col(),
+                field_name,
+                it.dest_col(),
+                m.frame_field_descriptor_.name()
+            );
+            util::check(
+                data != end || remaining_fields_empty(it, context),
+                "Reached end of input block with {} fields to decode",
+                it.remaining_fields()
+            );
             decode_or_expand(
                 data,
                 buffer.data() + m.offset_bytes_,
                 encoded_field,
-                m.source_type_desc_,
-                m.dest_type_desc_,
                 m.dest_bytes_,
                 buffers,
-                encoding_version
+                encoding_version,
+                m
             );
             ARCTICDB_TRACE(log::codec(), "Decoded column {} to position {}", field_name, data - begin);
 
@@ -434,10 +424,11 @@ void decode_into_frame_static(
 }
 
 void decode_into_frame_dynamic(
-        SegmentInMemory &frame,
-        PipelineContextRow &context,
-        Segment &&s,
-        const std::shared_ptr<BufferHolder>& buffers) {
+    SegmentInMemory& frame,
+    PipelineContextRow& context,
+    Segment&& s,
+    const std::shared_ptr<BufferHolder>& buffers
+) {
     ARCTICDB_SAMPLE_DEFAULT(DecodeIntoFrame)
     auto seg = std::move(s);
     const uint8_t *data = seg.buffer().data();
@@ -450,9 +441,11 @@ void decode_into_frame_dynamic(
     context.set_compacted(hdr.compacted());
     const EncodingVersion encdoing_version = EncodingVersion(hdr.encoding_version());
     const bool has_magic_numbers = encdoing_version == EncodingVersion::V2;
+    VariantEncodedFieldCollection fields(seg);
 
-    if (data != end) {
-        VariantEncodedFieldCollection fields(seg);
+    // data == end in case we have empty data types (e.g. {EMPTYVAL, Dim0}, {EMPTYVAL, Dim1}) for which we store nothing
+    // in storage as they can be reconstructed in the type handler on the read path.
+    if (data != end || fields.size() > 0) {
         auto index_field = fields.at(0u);
         decode_index_field(frame, index_field, data, begin, end, context, encdoing_version);
 
@@ -469,53 +462,85 @@ void decode_into_frame_dynamic(
 
             auto dst_col = *frame_loc_opt;
             auto& buffer = frame.column(static_cast<position_t>(dst_col)).data().buffer();
-            if(ColumnMapping m{frame, dst_col, field_col, context};!trivially_compatible_types(m.source_type_desc_, m.dest_type_desc_)) {
-                util::check(static_cast<bool>(has_valid_type_promotion(m.source_type_desc_, m.dest_type_desc_)), "Can't promote type {} to type {} in field {}",
-                            m.source_type_desc_, m.dest_type_desc_, m.frame_field_descriptor_.name());
-
-                    m.dest_type_desc_.visit_tag([&buffer, &m, &data, encoded_field, buffers, encdoing_version] (auto dest_desc_tag) {
-                        using DestinationType =  typename decltype(dest_desc_tag)::DataTypeTag::raw_type;
-                        m.source_type_desc_.visit_tag([&buffer, &m, &data, &encoded_field, &buffers, encdoing_version] (auto src_desc_tag ) {
-                            using SourceType =  typename decltype(src_desc_tag)::DataTypeTag::raw_type;
-                            if constexpr (std::is_arithmetic_v<SourceType> && std::is_arithmetic_v<DestinationType>) {
-                                const auto src_bytes = sizeof_datatype(m.source_type_desc_) * m.num_rows_;
+            const ColumnMapping m{frame, dst_col, field_col, context};
+            if(!trivially_compatible_types(m.source_type_desc_, m.dest_type_desc_)) {
+                util::check(
+                    static_cast<bool>(has_valid_type_promotion(m.source_type_desc_, m.dest_type_desc_)),
+                    "Can't promote type {} to type {} in field {}",
+                    m.source_type_desc_,
+                    m.dest_type_desc_,
+                    m.frame_field_descriptor_.name()
+                );
+                m.dest_type_desc_.visit_tag([&](auto dest_desc_tag) {
+                    using DestinationType = typename decltype(dest_desc_tag)::DataTypeTag::raw_type;
+                    m.source_type_desc_.visit_tag([&](auto src_desc_tag) {
+                        using SourceType = typename decltype(src_desc_tag)::DataTypeTag::raw_type;
+                        if constexpr (std::is_arithmetic_v<SourceType> && std::is_arithmetic_v<DestinationType>) {
+                            const auto src_bytes = sizeof_datatype(m.source_type_desc_) * m.num_rows_;
+                            auto dest_ptr = reinterpret_cast<DestinationType*>(buffer.data() + m.offset_bytes_);
+                            if constexpr (is_empty_type(src_desc_tag.data_type())) {
+                                decode_or_expand(
+                                    data,
+                                    reinterpret_cast<uint8_t*>(dest_ptr),
+                                    encoded_field,
+                                    src_bytes,
+                                    buffers,
+                                    encdoing_version,
+                                    m
+                                );
+                            } else {
                                 Buffer tmp_buf{src_bytes};
                                 decode_or_expand(
                                     data,
                                     tmp_buf.data(),
                                     encoded_field,
-                                    m.source_type_desc_,
-                                    m.dest_type_desc_,
                                     src_bytes,
                                     buffers,
-                                    encdoing_version
+                                    encdoing_version,
+                                    m
                                 );
-                                auto src_ptr = reinterpret_cast<SourceType *>(tmp_buf.data());
-                                auto dest_ptr = reinterpret_cast<DestinationType *>(buffer.data() + m.offset_bytes_);
+                                auto src_ptr = reinterpret_cast<SourceType*>(tmp_buf.data());
                                 for (auto i = 0u; i < m.num_rows_; ++i) {
                                     *dest_ptr++ = static_cast<DestinationType>(*src_ptr++);
                                 }
-                            } else {
-                                util::raise_rte("Can't promote type {} to type {} in field {}", m.source_type_desc_, m.dest_type_desc_, m.frame_field_descriptor_.name());
                             }
-                        });
+                        } else {
+                            util::raise_rte(
+                                "Can't promote type {} to type {} in field {}",
+                                m.source_type_desc_,
+                                m.dest_type_desc_,
+                                m.frame_field_descriptor_.name()
+                            );
+                        }
                     });
-                    ARCTICDB_TRACE(log::codec(), "Decoded column {} to position {}", m.frame_field_descriptor_.name(), data - begin);
+                });
+                ARCTICDB_TRACE(
+                    log::codec(),
+                    "Decoded column {} to position {}",
+                    m.frame_field_descriptor_.name(),
+                    data - begin
+                );
             } else {
-                ARCTICDB_TRACE(log::storage(), "Creating data slice at {} with total size {} ({} rows)", m.offset_bytes_, m.dest_bytes_,
-                                     context.slice_and_key().slice_.row_range.diff());
-                util::check(data != end,
-                            "Reached end of input block with {} fields to decode",
-                            field_count - field_col);
+                ARCTICDB_TRACE(
+                    log::storage(),
+                    "Creating data slice at {} with total size {} ({} rows)",
+                    m.offset_bytes_,
+                    m.dest_bytes_,
+                    context.slice_and_key().slice_.row_range.diff()
+                );
+                util::check(
+                    data != end || is_empty_type(m.source_type_desc_.data_type()),
+                    "Reached end of input block with {} fields to decode",
+                    field_count - field_col
+                );
                 decode_or_expand(
                     data,
                     buffer.data() + m.offset_bytes_,
                     encoded_field,
-                    m.source_type_desc_,
-                    m.dest_type_desc_,
                     m.dest_bytes_,
                     buffers,
-                    encdoing_version
+                    encdoing_version,
+                    m
                 );
             }
             ARCTICDB_TRACE(log::codec(), "Decoded column {} to position {}", frame.field(dst_col).name(), data - begin);
@@ -1004,18 +1029,24 @@ class DynamicStringReducer : public StringReducer {
         const auto& segment_descriptor = context_row.descriptor();
         const auto& segment_field = segment_descriptor[column_index];
 
-        auto has_type_conversion = frame_field_.type() != segment_field.type();
-        util::check(!has_type_conversion || trivially_compatible_types(frame_field_.type(), segment_field.type()),
-                    "Cannot convert from type {} to {} in frame field", frame_field_.type(), segment_field.type());
+        const bool trivially_compatible = trivially_compatible_types(frame_field_.type(), segment_field.type());
+        // In case the segment type is EMPTYVAL the empty type handler should have ran and set all missing values
+        // to not_a_string()
+        util::check(trivially_compatible || is_empty_type(segment_field.type().data_type()),
+            "String types are not trivially compatible. Cannot convert from type {} to {} in frame field.",
+            frame_field_.type(),
+            segment_field.type()
+        );
 
         auto is_utf = is_utf_type(slice_value_type(frame_field_.type().data_type()));
         size_t end =  context_row.slice_and_key().slice_.row_range.second - frame_.offset();
 
+        const auto is_type_different = frame_field_.type() != segment_field.type();
         auto ptr_src = get_offset_ptr_at(row_, src_buffer_);
         if(do_lock_)
-            process_string_views<LockActive>(has_type_conversion, is_utf, end, ptr_src, context_row.string_pool());
+            process_string_views<LockActive>(is_type_different, is_utf, end, ptr_src, context_row.string_pool());
         else
-            process_string_views<LockDisabled>(has_type_conversion, is_utf, end, ptr_src, context_row.string_pool());
+            process_string_views<LockDisabled>(is_type_different, is_utf, end, ptr_src, context_row.string_pool());
     }
 
     void finalize() override {
diff --git a/cpp/arcticdb/pipeline/write_frame.cpp b/cpp/arcticdb/pipeline/write_frame.cpp
index 263e8aef0b..a3dfe27197 100644
--- a/cpp/arcticdb/pipeline/write_frame.cpp
+++ b/cpp/arcticdb/pipeline/write_frame.cpp
@@ -193,13 +193,24 @@ folly::Future<entity::AtomKey> append_frame(
         slices_to_write.insert(std::end(slices_to_write), std::make_move_iterator(std::begin(slice_and_keys_to_append)), std::make_move_iterator(std::end(slice_and_keys_to_append)));
         std::sort(std::begin(slices_to_write), std::end(slices_to_write));
         if(dynamic_schema) {
-            auto merged_descriptor =
-                merge_descriptors(frame.desc, std::vector< std::shared_ptr<FieldCollection>>{ index_segment_reader.tsd().fields_ptr()}, {});
+            auto merged_descriptor = merge_descriptors(
+                frame.desc,
+                std::vector<std::shared_ptr<FieldCollection>>{index_segment_reader.tsd().fields_ptr()},
+                {}
+            );
             merged_descriptor.set_sorted(deduce_sorted(index_segment_reader.get_sorted(), frame.desc.get_sorted()));
             auto tsd =
                 make_timeseries_descriptor(frame.num_rows + frame.offset, std::move(merged_descriptor), std::move(frame.norm_meta), std::move(frame.user_meta), std::nullopt, std::nullopt, frame.bucketize_dynamic);
             return index::write_index(stream::index_type_from_descriptor(frame.desc), std::move(tsd), std::move(slices_to_write), key, store);
         } else {
+            const FieldCollection& new_fields{index_segment_reader.tsd().fields()};
+            for (int i = 0; i < new_fields.size(); ++i) {
+                const Field& new_field = new_fields.at(i);
+                TypeDescriptor& original_type = frame.desc.mutable_field(i).mutable_type();
+                if (is_empty_type(original_type.data_type()) && !is_empty_type(new_field.type().data_type())) {
+                    original_type = new_field.type();
+                }
+            }
             frame.desc.set_sorted(deduce_sorted(index_segment_reader.get_sorted(), frame.desc.get_sorted()));
             return index::write_index(std::move(frame), std::move(slices_to_write), key, store);
         }
diff --git a/cpp/arcticdb/processing/aggregation.cpp b/cpp/arcticdb/processing/aggregation.cpp
index 11d8b61aba..14368b8edf 100644
--- a/cpp/arcticdb/processing/aggregation.cpp
+++ b/cpp/arcticdb/processing/aggregation.cpp
@@ -142,11 +142,6 @@ namespace
     struct OutputType<DataTypeTag<DataType::PYBOOL8>, void> {
         using type = ScalarTagType<DataTypeTag<DataType::PYBOOL8>>;
     };
-
-    template<>
-    struct OutputType<DataTypeTag<DataType::PYBOOL64>, void> {
-        using type = ScalarTagType<DataTypeTag<DataType::PYBOOL64>>;
-    };
 }
 
 /**********************
diff --git a/cpp/arcticdb/python/python_handlers.cpp b/cpp/arcticdb/python/python_handlers.cpp
index b549083444..2f8d343c3a 100644
--- a/cpp/arcticdb/python/python_handlers.cpp
+++ b/cpp/arcticdb/python/python_handlers.cpp
@@ -61,37 +61,42 @@ namespace arcticdb {
         const uint8_t*& input,
         uint8_t* dest,
         const VariantField& variant_field,
-        const entity::TypeDescriptor& source_type_descriptor,
-        const entity::TypeDescriptor& destination_type_descriptor,
         size_t dest_bytes,
         std::shared_ptr<BufferHolder>,
-        EncodingVersion
+        EncodingVersion,
+        const ColumnMapping& m
     ) {
         ARCTICDB_SAMPLE(HandleEmpty, 0)
         util::check(dest != nullptr, "Got null destination pointer");
-        const size_t num_rows = dest_bytes / destination_type_descriptor.get_type_byte_size();
+        ARCTICDB_TRACE(
+            log::version(),
+            "Empty type handler invoked for source type: {}, destination type: {}, num rows: {}",
+            m.source_type_desc_,
+            m.dest_type_desc_,
+            m.num_rows_
+        );
         static_assert(get_type_size(DataType::EMPTYVAL) == sizeof(PyObject*));
 
         // TODO: arcticdb::util::default_initialize does mostly the same thing
         // the main difference is that it does not write py::none directly
         // * Should use that?
         // * It doesn't handle nullable bools and arrays? How should they be handled?
-        destination_type_descriptor.visit_tag([&](auto tag) {
+        m.dest_type_desc_.visit_tag([&](auto tag) {
             using RawType = typename decltype(tag)::DataTypeTag::raw_type;
             RawType* ptr_dest = reinterpret_cast<RawType*>(dest);
             if constexpr (is_sequence_type(tag.data_type())) {
                 // not_a_string() is set here as strings are offset in the string pool
                 // later in fix_and_reduce() the offsets are replaced by a real python strings
                 // TODO: It would be best if the type handler sets this to py::none
-                std::fill_n(ptr_dest, num_rows, arcticdb::not_a_string());
+                std::fill_n(ptr_dest, m.num_rows_, arcticdb::not_a_string());
             } else if constexpr (is_nullable_type(TypeDescriptor(tag))) {
-                fill_with_none(reinterpret_cast<const PyObject**>(dest), num_rows);
+                fill_with_none(reinterpret_cast<const PyObject**>(dest), m.num_rows_);
             } else if constexpr (is_floating_point_type(tag.data_type())) {
-                std::fill_n(ptr_dest, num_rows, std::numeric_limits<RawType>::quiet_NaN());
+                std::fill_n(ptr_dest, m.num_rows_, std::numeric_limits<RawType>::quiet_NaN());
             } else if constexpr (is_integer_type(tag.data_type()) || is_bool_type(tag.data_type())) {
-                std::memset(dest, 0, sizeof(RawType) * num_rows);
+                std::memset(dest, 0, sizeof(RawType) * m.num_rows_);
             } else if constexpr (is_time_type(tag.data_type())) {
-                std::fill_n(ptr_dest, num_rows, arcticdb::util::NaT);
+                std::fill_n(ptr_dest, m.num_rows_, arcticdb::util::NaT);
             } else {
                 static_assert(sizeof(tag) == 0, "Unhandled data_type");
             }
@@ -116,27 +121,29 @@ namespace arcticdb {
         });
     }
 
+    int EmptyHandler::type_size() const {
+        return sizeof(PyObject*);
+    }
+
     void BoolHandler::handle_type(
-            const uint8_t*& data,
-            uint8_t* dest,
-            const VariantField& encoded_field_info,
-            const entity::TypeDescriptor& source_type_descriptor,
-            const entity::TypeDescriptor& destination_type_descriptor,
-            size_t dest_bytes,
-            std::shared_ptr<BufferHolder>,
-            EncodingVersion encding_version
+        const uint8_t*& data,
+        uint8_t* dest,
+        const VariantField& encoded_field_info,
+        size_t dest_bytes,
+        std::shared_ptr<BufferHolder>,
+        EncodingVersion encding_version,
+        const ColumnMapping& m
     ) {
         std::visit([&](const auto& field){
             ARCTICDB_SAMPLE(HandleBool, 0)
             util::check(dest != nullptr, "Got null destination pointer");
             util::check(field->has_ndarray(), "Bool handler expected array");
             ARCTICDB_DEBUG(log::version(), "Bool handler got encoded field: {}", field->DebugString());
-            size_t row_count = dest_bytes / get_type_size(DataType::PYBOOL64);
             auto ptr_dest = reinterpret_cast<const PyObject**>(dest);
 
             if(!field->ndarray().sparse_map_bytes()) {
                 ARCTICDB_DEBUG(log::version(), "Bool handler has no values");
-                fill_with_none(ptr_dest, row_count);
+                fill_with_none(ptr_dest, m.num_rows_);
                 return;
             }
 
@@ -145,7 +152,7 @@ namespace arcticdb {
             const auto bytes = encoding_sizes::data_uncompressed_size(ndarray);
             auto sparse = ChunkedBuffer::presized(bytes);
             SliceDataSink sparse_sink{sparse.data(), bytes};
-            data += decode_field(source_type_descriptor, *field, data, sparse_sink, bv, encding_version);
+            data += decode_field(m.source_type_desc_, *field, data, sparse_sink, bv, encding_version);
             const auto num_bools = bv->count();
             auto ptr_src = sparse.template ptr_cast<uint8_t>(0, num_bools * sizeof(uint8_t));
             auto last_row = 0u;
@@ -160,22 +167,25 @@ namespace arcticdb {
                 ++last_row;
                 util::check(ptr_begin + last_row == ptr_dest, "Boolean pointer out of alignment {} != {}", last_row, uintptr_t(ptr_begin + last_row));
             }
-            fill_with_none(ptr_dest, row_count - last_row);
+            fill_with_none(ptr_dest, m.num_rows_ - last_row);
             util::check(ptr_begin + last_row == ptr_dest, "Boolean pointer out of alignment at end: {} != {}", last_row, uintptr_t(ptr_begin + last_row));
         }, encoded_field_info);
     }
 
+    int BoolHandler::type_size() const {
+        return sizeof(PyObject*);
+    }
+
     std::mutex ArrayHandler::initialize_array_mutex;
 
     void ArrayHandler::handle_type(
         const uint8_t*& data,
         uint8_t* dest,
         const VariantField& encoded_field_info,
-        const entity::TypeDescriptor& source_type_descriptor,
-        const entity::TypeDescriptor& destination_type_descriptor,
         size_t dest_bytes,
         std::shared_ptr<BufferHolder> buffers,
-		EncodingVersion encoding_version
+        EncodingVersion encoding_version,
+        const ColumnMapping& m
     ) {
         util::variant_match(encoded_field_info, [&](auto field){
             ARCTICDB_SAMPLE(HandleArray, 0)
@@ -188,17 +198,17 @@ namespace arcticdb {
                 fill_with_none(ptr_dest, row_count);
                 return;
             }
-            std::shared_ptr<Column> column = buffers->get_buffer(source_type_descriptor, true);
+            std::shared_ptr<Column> column = buffers->get_buffer(m.source_type_desc_, true);
             column->check_magic();
             log::version().info("Column got buffer at {}", uintptr_t(column.get()));
             auto bv = std::make_optional(util::BitSet{});
-            data += decode_field(source_type_descriptor, *field, data, *column, bv, encoding_version);
+            data += decode_field(m.source_type_desc_, *field, data, *column, bv, encoding_version);
 
             auto last_row = 0u;
             ARCTICDB_SUBSAMPLE(InitArrayAcquireGIL, 0)
-            const auto strides = static_cast<stride_t>(get_type_size(source_type_descriptor.data_type()));
-            const py::dtype py_dtype = generate_python_dtype(source_type_descriptor, strides);
-            source_type_descriptor.visit_tag([&] (auto tdt) {
+            const auto strides = static_cast<stride_t>(get_type_size(m.source_type_desc_.data_type()));
+            const py::dtype py_dtype = generate_python_dtype(m.source_type_desc_, strides);
+            m.source_type_desc_.visit_tag([&] (auto tdt) {
                 const auto& blocks = column->blocks();
                 if(blocks.empty())
                     return;
@@ -236,4 +246,8 @@ namespace arcticdb {
             fill_with_none(ptr_dest, row_count - last_row);
         });
     }
+
+    int ArrayHandler::type_size() const {
+        return sizeof(PyObject*);
+    }
 }
diff --git a/cpp/arcticdb/python/python_handlers.hpp b/cpp/arcticdb/python/python_handlers.hpp
index 4c501bee5c..ac073ac69d 100644
--- a/cpp/arcticdb/python/python_handlers.hpp
+++ b/cpp/arcticdb/python/python_handlers.hpp
@@ -10,18 +10,21 @@
 #include <util/type_handler.hpp>
 
 namespace arcticdb {
-    namespace py = pybind11;
+    struct ColumnMapping;
+
     struct EmptyHandler {
         /// @see arcticdb::ITypeHandler
         void handle_type(
             const uint8_t*& data,
             uint8_t* dest,
             const VariantField& encoded_field,
-            const entity::TypeDescriptor& source_type_descriptor,
-            const entity::TypeDescriptor& destination_type_descriptor,
             size_t dest_bytes,
             std::shared_ptr<BufferHolder> buffers,
-            EncodingVersion encding_version);
+            EncodingVersion encding_version,
+            const ColumnMapping& columnMapping
+        );
+
+        int type_size() const;
     };
 
     struct BoolHandler {
@@ -30,26 +33,25 @@ namespace arcticdb {
             const uint8_t *&data,
             uint8_t *dest,
             const VariantField &encoded_field,
-            const entity::TypeDescriptor& source_type_descriptor,
-            const entity::TypeDescriptor& destination_type_descriptor,
             size_t dest_bytes,
             std::shared_ptr<BufferHolder> buffers,
-            EncodingVersion encding_version);
+            EncodingVersion encding_version,
+            const ColumnMapping& columnMapping
+        );
+        int type_size() const;
     };
 
     struct DecimalHandler {
         void handle_type(
-                const uint8_t*& data,
-                uint8_t* dest,
-                const VariantField& encoded_field,
-                const entity::TypeDescriptor& source_type_descriptor,
-                const entity::TypeDescriptor& destination_type_descriptor,
-                size_t dest_bytes,
-                std::shared_ptr<BufferHolder> buffers
+            const uint8_t*& data,
+            uint8_t* dest,
+            const VariantField& encoded_field,
+            size_t dest_bytes,
+            std::shared_ptr<BufferHolder> buffers,
+            EncodingVersion encding_version,
+            const ColumnMapping& m
         );
-
-        std::shared_ptr<std::mutex> mutex_ = std::make_shared<std::mutex>();
-        std::shared_ptr<py::object> decimal_ = std::make_shared<py::object>(py::module_::import("decimal").attr("Decimal"));
+        int type_size() const;
     };
 
     struct ArrayHandler {
@@ -58,11 +60,12 @@ namespace arcticdb {
             const uint8_t*& data,
             uint8_t* dest,
             const VariantField& encoded_field,
-            const entity::TypeDescriptor& source_type_descriptor,
-            const entity::TypeDescriptor& destination_type_descriptor,
             size_t dest_bytes,
             std::shared_ptr<BufferHolder> buffers,
-            EncodingVersion encding_version);
+            EncodingVersion encding_version,
+            const ColumnMapping& columnMapping
+        );
+        int type_size() const;
         static std::mutex initialize_array_mutex;
     };
 } //namespace arcticdb
diff --git a/cpp/arcticdb/python/python_module.cpp b/cpp/arcticdb/python/python_module.cpp
index cd6c554a1c..cc061dc5fe 100644
--- a/cpp/arcticdb/python/python_module.cpp
+++ b/cpp/arcticdb/python/python_module.cpp
@@ -291,7 +291,6 @@ void register_type_handlers() {
     for(const DataType& data_type : allowed_array_types) {
         TypeHandlerRegistry::instance()->register_handler(TypeDescriptor{data_type, Dimension::Dim1}, arcticdb::ArrayHandler());
     }
-    TypeHandlerRegistry::instance()->register_handler(TypeDescriptor{DataType::PYBOOL64, Dimension::Dim0}, arcticdb::BoolHandler());
     TypeHandlerRegistry::instance()->register_handler(TypeDescriptor{DataType::PYBOOL8, Dimension::Dim0}, arcticdb::BoolHandler());
 }
 
diff --git a/cpp/arcticdb/stream/index.hpp b/cpp/arcticdb/stream/index.hpp
index 370e83d67e..006b695f67 100644
--- a/cpp/arcticdb/stream/index.hpp
+++ b/cpp/arcticdb/stream/index.hpp
@@ -82,7 +82,7 @@ class TimeseriesIndex : public BaseIndex<TimeseriesIndex> {
 
     void check(const FieldCollection &fields) const {
         const size_t fields_size = fields.size();
-        const int current_fields_size = int(field_count());
+        constexpr int current_fields_size = int(field_count());
 
         const TypeDescriptor &first_field_type = fields[0].type();
         const TypeDescriptor &current_first_field_type = this->field(0).type();
diff --git a/cpp/arcticdb/util/sparse_utils.hpp b/cpp/arcticdb/util/sparse_utils.hpp
index 6eb892c1b3..5b17bcfe5a 100644
--- a/cpp/arcticdb/util/sparse_utils.hpp
+++ b/cpp/arcticdb/util/sparse_utils.hpp
@@ -75,19 +75,16 @@ template <typename TagType>
 void default_initialize(uint8_t* data, size_t bytes) {
     using RawType = typename TagType::DataTypeTag::raw_type;
     const auto num_rows ARCTICDB_UNUSED = bytes / sizeof(RawType);
-
-    constexpr auto data_type =TagType::DataTypeTag::data_type;
+    constexpr auto data_type = TagType::DataTypeTag::data_type;
     auto type_ptr ARCTICDB_UNUSED = reinterpret_cast<RawType*>(data);
     if constexpr (is_sequence_type(data_type)) {
-        std::fill(type_ptr, type_ptr + num_rows, not_a_string());
-    }
-    else if constexpr (is_floating_point_type(data_type)) {
-        std::fill(type_ptr, type_ptr + num_rows, std::numeric_limits<double>::quiet_NaN());
-    }
-    else if constexpr (is_time_type(data_type)) {
-        std::fill(type_ptr, type_ptr + num_rows, NaT);
+        std::fill_n(type_ptr, num_rows, not_a_string());
+    } else if constexpr (is_floating_point_type(data_type)) {
+        std::fill_n(type_ptr, num_rows, std::numeric_limits<double>::quiet_NaN());
+    } else if constexpr (is_time_type(data_type)) {
+        std::fill_n(type_ptr, num_rows, NaT);
     } else {
-        std::fill(data, data + bytes, 0);
+        std::fill_n(data, bytes, 0);
     }
 }
 
diff --git a/cpp/arcticdb/util/type_handler.hpp b/cpp/arcticdb/util/type_handler.hpp
index aa21f0e14f..8a3d167bcd 100644
--- a/cpp/arcticdb/util/type_handler.hpp
+++ b/cpp/arcticdb/util/type_handler.hpp
@@ -11,6 +11,7 @@
 #include <arcticdb/column_store/chunked_buffer.hpp>
 #include <arcticdb/util/buffer_holder.hpp>
 #include <arcticdb/codec/variant_encoded_field_collection.hpp>
+#include <arcticdb/pipeline/column_mapping.hpp>
 
 #include <folly/Poly.h>
 
@@ -32,28 +33,30 @@ struct ITypeHandler {
             const uint8_t*& source,
             uint8_t* dest,
             const VariantField& encoded_field_info,
-            const entity::TypeDescriptor& source_type_descriptor,
-            const entity::TypeDescriptor& destination_type_descriptor,
             size_t dest_bytes,
             std::shared_ptr<BufferHolder> buffers,
-            EncodingVersion encoding_version
+            EncodingVersion encoding_version,
+            const ColumnMapping& m
         ) {
             folly::poly_call<0>(
                 *this,
                 source,
                 dest,
                 encoded_field_info,
-                source_type_descriptor,
-                destination_type_descriptor,
                 dest_bytes,
                 buffers,
-                encoding_version
+                encoding_version,
+                m
             );
         }
+
+        int type_size() const {
+            return folly::poly_call<1>(*this);
+        }
     };
 
     template<class T>
-    using Members = folly::PolyMembers<&T::handle_type>;
+    using Members = folly::PolyMembers<&T::handle_type, &T::type_size>;
 };
 
 using TypeHandler = folly::Poly<ITypeHandler>;
diff --git a/python/tests/conftest.py b/python/tests/conftest.py
index fd6d2b1e4a..f925ccd1d1 100644
--- a/python/tests/conftest.py
+++ b/python/tests/conftest.py
@@ -403,14 +403,15 @@ def lmdb_version_store_v2(version_store_factory, lib_name):
     return version_store_factory(dynamic_strings=True, encoding_version=int(EncodingVersion.V2), name=library_name)
 
 
-@pytest.fixture
-def lmdb_version_store(lmdb_version_store_v1, lmdb_version_store_v2, encoding_version):
-    if encoding_version == EncodingVersion.V1:
-        return lmdb_version_store_v1
-    elif encoding_version == EncodingVersion.V2:
-        return lmdb_version_store_v2
-    else:
-        raise ValueError(f"Unexpected encoding version: {encoding_version}")
+@pytest.fixture(
+    scope="function",
+    params=(
+        "lmdb_version_store_v1",
+        "lmdb_version_store_v2",
+    )
+)
+def lmdb_version_store(request):
+    yield request.getfixturevalue(request.param)
 
 
 @pytest.fixture
@@ -657,6 +658,22 @@ def get_df(ts, width, max_col_width):
     return get_df
 
 
+@pytest.fixture(
+    scope="function",
+    params=(
+        "lmdb_version_store_v1",
+        "lmdb_version_store_v2",
+        "lmdb_version_store_dynamic_schema_v1",
+        "lmdb_version_store_dynamic_schema_v2",
+    ),
+)
+def lmdb_version_store_static_and_dynamic(request):
+    """
+    Designed to test all combinations between schema and encoding version for LMDB
+    """
+    yield request.getfixturevalue(request.param)
+
+
 @pytest.fixture(
     scope="function",
     params=(
diff --git a/python/tests/unit/arcticdb/version_store/test_empty_column_type.py b/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
index bd9b03e45a..9cccbea962 100644
--- a/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
+++ b/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
@@ -22,7 +22,7 @@ def float_dtype(request):
 
 
 # object is for nullable boolean
-@pytest.fixture(params=["bool", "object"])
+@pytest.fixture(params=["object", "bool"])
 def boolean_dtype(request):
     yield request.param
 
@@ -30,57 +30,201 @@ def boolean_dtype(request):
 class TestCanAppendToEmptyColumn:
 
     @pytest.fixture(autouse=True)
-    def create_empty_column(self, lmdb_version_store):
-        df_empty = pd.DataFrame({"col1": [None, None]})
-        lmdb_version_store.write("sym", df_empty)
+    def create_empty_column(self, lmdb_version_store_static_and_dynamic):
+        lmdb_version_store_static_and_dynamic.write("sym", pd.DataFrame({"col1": [None, None]}))
         yield
 
-    def test_integer(self, lmdb_version_store, int_dtype):
+    def test_integer(self, lmdb_version_store_static_and_dynamic, int_dtype):
         df_non_empty = pd.DataFrame({"col1": np.array([1,2,3], dtype=int_dtype)})
-        lmdb_version_store.append("sym", df_non_empty)
+        lmdb_version_store_static_and_dynamic.append("sym", df_non_empty)
         expected_result = pd.DataFrame({"col1": np.array([0,0,1,2,3], dtype=int_dtype)})
-        assert_frame_equal(lmdb_version_store.read("sym").data, expected_result)
-
-    def test_float(self, lmdb_version_store, float_dtype):
+        assert_frame_equal(lmdb_version_store_static_and_dynamic.read("sym").data, expected_result)
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym", row_range=[0,2]).data,
+            pd.DataFrame({"col1": np.array([0,0], dtype=int_dtype)})
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym", row_range=[2,5]).data,
+            df_non_empty
+        )
+
+    def test_float(self, lmdb_version_store_static_and_dynamic, float_dtype):
         df_non_empty = pd.DataFrame({"col1": np.array([1,2,3], dtype=float_dtype)})
-        lmdb_version_store.append("sym", df_non_empty)
+        lmdb_version_store_static_and_dynamic.append("sym", df_non_empty)
         expected_result = pd.DataFrame({"col1": np.array([float("NaN"),float("NaN"),1,2,3], dtype=float_dtype)})
-        assert_frame_equal(lmdb_version_store.read("sym").data, expected_result)
-
-    def test_bool(self, lmdb_version_store, boolean_dtype):
+        assert_frame_equal(lmdb_version_store_static_and_dynamic.read("sym").data, expected_result)
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym", row_range=[0,2]).data,
+            pd.DataFrame({"col1": np.array([float("NaN"),float("NaN")], dtype=float_dtype)})
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym", row_range=[2,5]).data,
+            df_non_empty
+        )
+
+    def test_bool(self, lmdb_version_store_static_and_dynamic, boolean_dtype):
         df_non_empty = pd.DataFrame({"col1": np.array([True, False, True], dtype=boolean_dtype)})
-        lmdb_version_store.append("sym", df_non_empty)
+        lmdb_version_store_static_and_dynamic.append("sym", df_non_empty)
         expected_result = pd.DataFrame({"col1": np.array([None, None, True, False, True], dtype=boolean_dtype)})
-        assert_frame_equal(lmdb_version_store.read("sym").data, expected_result)
-
-    def test_empty(self, lmdb_version_store):
+        assert_frame_equal(lmdb_version_store_static_and_dynamic.read("sym").data, expected_result)
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym", row_range=[0,2]).data,
+            pd.DataFrame({"col1": np.array([None, None], dtype=boolean_dtype)})
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym", row_range=[2,5]).data,
+            df_non_empty
+        )
+
+    def test_empty(self, lmdb_version_store_static_and_dynamic):
         df_non_empty = pd.DataFrame({"col1": np.array([None, None, None])})
-        lmdb_version_store.append("sym", df_non_empty)
+        lmdb_version_store_static_and_dynamic.append("sym", df_non_empty)
         expected_result = pd.DataFrame({"col1": np.array([None, None, None, None, None])})
-        assert_frame_equal(lmdb_version_store.read("sym").data, expected_result)
-
-    def test_string(self, lmdb_version_store):
+        assert_frame_equal(lmdb_version_store_static_and_dynamic.read("sym").data, expected_result)
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym", row_range=[0,2]).data,
+            pd.DataFrame({"col1": np.array([None, None])})
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym", row_range=[2,5]).data,
+            df_non_empty
+        )
+
+    def test_string(self, lmdb_version_store_static_and_dynamic):
         df_non_empty = pd.DataFrame({"col1": np.array(["some_string", "long_string"*100])})
-        lmdb_version_store.append("sym", df_non_empty)
+        lmdb_version_store_static_and_dynamic.append("sym", df_non_empty)
         expected_result = pd.DataFrame({"col1": np.array([None, None, "some_string", "long_string"*100])})
-        assert_frame_equal(lmdb_version_store.read("sym").data, expected_result)
-
-    def test_date(self, lmdb_version_store):
-        df_non_empty = pd.DataFrame({"col1": np.array([np.datetime64('2005-02'), np.datetime64('2005-03'), np.datetime64('2005-03')])})
-        lmdb_version_store.append("sym", df_non_empty)
+        assert_frame_equal(lmdb_version_store_static_and_dynamic.read("sym").data, expected_result)
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym", row_range=[0,2]).data,
+            pd.DataFrame({"col1": np.array([None, None])})
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym", row_range=[2,5]).data,
+            df_non_empty
+        )
+
+    def test_date(self, lmdb_version_store_static_and_dynamic):
+        df_non_empty = pd.DataFrame({"col1": np.array([np.datetime64('2005-02'), np.datetime64('2005-03'), np.datetime64('2005-03')])}, dtype="datetime64[ns]")
+        lmdb_version_store_static_and_dynamic.append("sym", df_non_empty)
         expected_result = pd.DataFrame({"col1": np.array([np.datetime64('NaT'), np.datetime64('NaT'), np.datetime64('2005-02'), np.datetime64('2005-03'), np.datetime64('2005-03')], dtype="datetime64[ns]")})
-        assert_frame_equal(lmdb_version_store.read("sym").data, expected_result)
+        assert_frame_equal(lmdb_version_store_static_and_dynamic.read("sym").data, expected_result)
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym", row_range=[0,2]).data,
+            pd.DataFrame({"col1": np.array([np.datetime64('NaT'), np.datetime64('NaT')], dtype="datetime64[ns]")})
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym", row_range=[2,5]).data,
+            df_non_empty
+        )
 
 
 class TestCanAppendEmptyToColumn:
 
-    def test_integer(self, lmdb_version_store_v2):
-        int_dtype = "int32"
-        lmdb_version_store = lmdb_version_store_v2
-        df = pd.DataFrame({"col1": np.array([1,2,3], dtype=int_dtype)})
-        lmdb_version_store.write("sym", df)
-        lmdb_version_store.append("sym", pd.DataFrame({"col1": [None, None]}))
-        lmdb_version_store.append("sym", pd.DataFrame({"col1": [None, None]}))
-        expected_result = pd.DataFrame({"col1": np.array([1,2,3,0,0,0,0], dtype=int_dtype)})
-        print(lmdb_version_store.read("sym", row_range=[5,7]).data)
-        assert_frame_equal(lmdb_version_store.read("sym").data, expected_result)
\ No newline at end of file
+    def test_integer(self, lmdb_version_store_static_and_dynamic, int_dtype):
+        df_initial = pd.DataFrame({"col1": np.array([1,2,3], dtype=int_dtype)})
+        lmdb_version_store_static_and_dynamic.write("sym", df_initial)
+        lmdb_version_store_static_and_dynamic.append("sym", pd.DataFrame({"col1": [None, None]}))
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym").data,
+            pd.DataFrame({"col1": np.array([1,2,3,0,0], dtype=int_dtype)})
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym", row_range=[0,3]).data,
+            df_initial
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym", row_range=[3,5]).data,
+            pd.DataFrame({"col1": np.array([0,0], dtype=int_dtype)})
+        )
+
+    def test_float(self, lmdb_version_store_static_and_dynamic, float_dtype):
+        df_initial = pd.DataFrame({"col1": np.array([1,2,3], dtype=float_dtype)})
+        lmdb_version_store_static_and_dynamic.write("sym", df_initial)
+        lmdb_version_store_static_and_dynamic.append("sym", pd.DataFrame({"col1": [None, None]}))
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym").data,
+            pd.DataFrame({"col1": np.array([1,2,3,float("NaN"),float("NaN")], dtype=float_dtype)})
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym", row_range=[0,3]).data,
+            df_initial
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym", row_range=[3,5]).data,
+            pd.DataFrame({"col1": np.array([float("NaN"), float("NaN")], dtype=float_dtype)})
+        )
+
+    def test_bool(self, lmdb_version_store_static_and_dynamic, boolean_dtype):
+        df_initial = pd.DataFrame({"col1": np.array([True, False, True], dtype=boolean_dtype)})
+        df_with_none = pd.DataFrame({"col1": np.array([None, None])})
+        lmdb_version_store_static_and_dynamic.write("sym", df_initial)
+        lmdb_version_store_static_and_dynamic.append("sym", df_with_none)
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym").data,
+            pd.DataFrame({"col1": np.array([True, False, True, None, None], dtype=boolean_dtype)})
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym", row_range=[0,3]).data,
+            df_initial
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym", row_range=[3,5]).data,
+            pd.DataFrame(df_with_none, dtype=boolean_dtype)
+        )
+
+    def test_string(self, lmdb_version_store_static_and_dynamic):
+        df_initial = pd.DataFrame({"col1": np.array(["some_string", "long_string"*100, ""])})
+        df_with_none = pd.DataFrame({"col1": np.array([None, None])})
+        lmdb_version_store_static_and_dynamic.write("sym", df_initial)
+        lmdb_version_store_static_and_dynamic.append("sym", df_with_none)
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym").data,
+            pd.DataFrame({"col1": np.array(["some_string", "long_string"*100, "", None, None])})
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym", row_range=[0,3]).data,
+            df_initial
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym", row_range=[3,5]).data,
+            df_with_none
+        )
+
+    def test_date(self, lmdb_version_store_static_and_dynamic):
+        df_initial = pd.DataFrame(
+            {
+                "col1": np.array(
+                    [
+                        np.datetime64('2005-02'),
+                        np.datetime64('2005-03'),
+                        np.datetime64('2005-03')
+                    ]
+                )
+            },
+            dtype="datetime64[ns]"
+        )
+        lmdb_version_store_static_and_dynamic.write("sym", df_initial)
+        lmdb_version_store_static_and_dynamic.append("sym", pd.DataFrame({"col1": np.array([None, None])}))
+        expected_result = pd.DataFrame(
+            {
+                "col1": np.array(
+                    [
+                        np.datetime64('2005-02'),
+                        np.datetime64('2005-03'),
+                        np.datetime64('2005-03'),
+                        np.datetime64('NaT'),
+                        np.datetime64('NaT')
+                    ],
+                    dtype="datetime64[ns]")
+            }
+        )
+        assert_frame_equal(lmdb_version_store_static_and_dynamic.read("sym").data, expected_result)
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym", row_range=[0,3]).data,
+            df_initial
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym", row_range=[3,5]).data,
+            pd.DataFrame({"col1": np.array([np.datetime64('NaT'), np.datetime64('NaT')], dtype="datetime64[ns]")})
+        )
\ No newline at end of file

From 840d8993cd2ee4eee353c5637c1472cb3f8c8466 Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Thu, 25 Jan 2024 10:38:27 +0200
Subject: [PATCH 05/30] Use boost span for flatten_and_fix_rows

---
 cpp/arcticdb/pipeline/write_frame.cpp | 20 +++++++++++---------
 cpp/arcticdb/pipeline/write_frame.hpp | 10 ++++------
 cpp/arcticdb/version/version_core.cpp | 14 ++++++--------
 3 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/cpp/arcticdb/pipeline/write_frame.cpp b/cpp/arcticdb/pipeline/write_frame.cpp
index a3dfe27197..b49caae4b7 100644
--- a/cpp/arcticdb/pipeline/write_frame.cpp
+++ b/cpp/arcticdb/pipeline/write_frame.cpp
@@ -28,8 +28,6 @@
 #include <arcticdb/entity/merge_descriptors.hpp>
 #include <arcticdb/async/task_scheduler.hpp>
 
-#include <pybind11/pybind11.h>
-
 namespace arcticdb::pipelines {
 
 using namespace arcticdb::entity;
@@ -299,19 +297,23 @@ std::optional<SliceAndKey> rewrite_partial_segment(
     }
 }
 
-std::vector<SliceAndKey> flatten_and_fix_rows(const std::vector<std::vector<SliceAndKey>>& groups, size_t& global_count) {
+std::vector<SliceAndKey> flatten_and_fix_rows(boost::span<const std::vector<SliceAndKey>> groups, size_t& global_count) {
     std::vector<SliceAndKey> output;
     global_count = 0;
-    for(auto group : groups) {
-        if(group.empty()) continue;
+    for (const std::vector<SliceAndKey>& group : groups) {
+        if (group.empty())
+            continue;
         auto group_start = group.begin()->slice_.row_range.first;
-        auto group_end = std::accumulate(std::begin(group), std::end(group), 0ULL, [](size_t a, const SliceAndKey& sk) { return std::max(a, sk.slice_.row_range.second); });
-        std::transform(std::begin(group), std::end(group), std::back_inserter(output), [&] (auto& sk) {
+        auto group_end = std::accumulate(std::begin(group), std::end(group), 0ULL, [](size_t a, const SliceAndKey& sk) {
+            return std::max(a, sk.slice_.row_range.second);
+        });
+        std::transform(std::begin(group), std::end(group), std::back_inserter(output), [&](SliceAndKey sk) {
             auto range_start = global_count + (sk.slice_.row_range.first - group_start);
             auto new_range = RowRange{range_start, range_start + (sk.slice_.row_range.diff())};
             sk.slice_.row_range = new_range;
-            return sk; });
-        global_count += (group_end - group_start) ;
+            return sk;
+        });
+        global_count += (group_end - group_start);
     }
     return output;
 }
diff --git a/cpp/arcticdb/pipeline/write_frame.hpp b/cpp/arcticdb/pipeline/write_frame.hpp
index 386a8181d4..52e4b69b6b 100644
--- a/cpp/arcticdb/pipeline/write_frame.hpp
+++ b/cpp/arcticdb/pipeline/write_frame.hpp
@@ -7,12 +7,9 @@
 
 #pragma once
 
-#include <arcticdb/stream/index.hpp>
-#include <arcticdb/entity/protobufs.hpp>
 #include <arcticdb/entity/index_range.hpp>
 
 #include <vector>
-#include <variant>
 #include <arcticdb/pipeline/input_tensor_frame.hpp>
 #include <arcticdb/stream/index.hpp>
 #include <folly/futures/Future.h>
@@ -22,6 +19,8 @@
 #include <arcticdb/storage/store.hpp>
 #include <arcticdb/pipeline/pipeline_common.hpp>
 
+#include<boost/core/span.hpp>
+
 namespace arcticdb::pipelines {
 
 using namespace arcticdb::stream;
@@ -70,8 +69,7 @@ std::optional<SliceAndKey> rewrite_partial_segment(
         bool before,
         const std::shared_ptr<Store>& store);
 
-std::vector<SliceAndKey> flatten_and_fix_rows(
-        const std::vector<std::vector<SliceAndKey>>& groups,
-        size_t& global_count);
+// TODO: Use std::span when C++20 is enabled
+std::vector<SliceAndKey> flatten_and_fix_rows(boost::span<const std::vector<SliceAndKey>> groups, size_t& global_count);
 
 } //namespace arcticdb::pipelines
\ No newline at end of file
diff --git a/cpp/arcticdb/version/version_core.cpp b/cpp/arcticdb/version/version_core.cpp
index faddb9735f..fb304c9a30 100644
--- a/cpp/arcticdb/version/version_core.cpp
+++ b/cpp/arcticdb/version/version_core.cpp
@@ -264,13 +264,12 @@ VersionedItem delete_range_impl(
     auto orig_filter_range = std::holds_alternative<std::monostate>(query.row_filter) ? get_query_index_range(index, index_range) : query.row_filter;
 
     size_t row_count = 0;
-    auto flattened_slice_and_keys = flatten_and_fix_rows(std::vector<std::vector<SliceAndKey>>{
+    const std::array<std::vector<SliceAndKey>, 5> groups{
         strictly_before(orig_filter_range, unaffected_keys),
         std::move(intersect_before),
         std::move(intersect_after),
-        strictly_after(orig_filter_range, unaffected_keys)},
-                                                         row_count
-                                                         );
+        strictly_after(orig_filter_range, unaffected_keys)};
+    auto flattened_slice_and_keys = flatten_and_fix_rows(boost::span{groups}, row_count);
 
     std::sort(std::begin(flattened_slice_and_keys), std::end(flattened_slice_and_keys));
     bool bucketize_dynamic = index_segment_reader.bucketize_dynamic();
@@ -367,14 +366,13 @@ VersionedItem update_impl(
 
     size_t row_count = 0;
     const size_t new_keys_size = new_slice_and_keys.size();
-    auto flattened_slice_and_keys = flatten_and_fix_rows(std::vector<std::vector<SliceAndKey>>{
+    const std::array<std::vector<SliceAndKey>, 5> groups{
         strictly_before(orig_filter_range, unaffected_keys),
         std::move(intersect_before),
         std::move(new_slice_and_keys),
         std::move(intersect_after),
-        strictly_after(orig_filter_range, unaffected_keys)},
-                                                         row_count
-                                                         );
+        strictly_after(orig_filter_range, unaffected_keys)};
+    auto flattened_slice_and_keys = flatten_and_fix_rows(boost::span{groups}, row_count);
 
     util::check(unaffected_keys.size() + new_keys_size + (affected_keys.size() * 2) >= flattened_slice_and_keys.size(),
                 "Output size mismatch: {} + {} + (2 * {}) < {}",

From 1c97adfed36e98248838a0ecbf1b971a8ab6a5af Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Thu, 25 Jan 2024 11:34:04 +0200
Subject: [PATCH 06/30] Use TypeDescriptor constructor instead of static_cast

---
 cpp/arcticdb/entity/merge_descriptors.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/arcticdb/entity/merge_descriptors.cpp b/cpp/arcticdb/entity/merge_descriptors.cpp
index 0be36d5c74..ed8ea636cf 100644
--- a/cpp/arcticdb/entity/merge_descriptors.cpp
+++ b/cpp/arcticdb/entity/merge_descriptors.cpp
@@ -32,11 +32,11 @@ StreamDescriptor merge_descriptors(
             util::variant_match(temp_idx, [&merged_fields, &merged_fields_map] (const auto& idx) {
                 using IndexType = std::decay_t<decltype(idx)>;
                 merged_fields.emplace_back(idx.name());
-                merged_fields_map.try_emplace(idx.name(), static_cast<TypeDescriptor>(typename IndexType::TypeDescTag{}));
+                merged_fields_map.try_emplace(idx.name(), TypeDescriptor{typename IndexType::TypeDescTag{}});
             });
-        }
-        else
+        } else {
             util::raise_rte("Descriptor has uninitialized index and no default supplied");
+        }
     } else {
         index = index_type_from_descriptor(original);
     }

From 56385851d2e7eb0f8d75324f24f9ff36321565e7 Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Mon, 29 Jan 2024 18:38:50 +0200
Subject: [PATCH 07/30] Use truncate columns on update and fix pybool type
 handler so that if sparse map is missing it assumes all values are there

---
 cpp/arcticdb/column_store/memory_segment.hpp  |   5 +-
 .../column_store/memory_segment_impl.cpp      |  32 +++-
 .../column_store/memory_segment_impl.hpp      |  18 ++-
 cpp/arcticdb/pipeline/filter_segment.hpp      |   8 -
 cpp/arcticdb/pipeline/string_pool_utils.hpp   |  12 +-
 cpp/arcticdb/pipeline/write_frame.cpp         | 149 +++++++++---------
 cpp/arcticdb/pipeline/write_frame.hpp         |   7 +-
 cpp/arcticdb/processing/clause.cpp            |   2 +-
 cpp/arcticdb/processing/processing_unit.cpp   |   2 +-
 cpp/arcticdb/python/python_handlers.cpp       |  53 +++----
 cpp/arcticdb/version/version_core.cpp         |  20 ++-
 .../version_store/test_empty_column_type.py   |  19 ++-
 12 files changed, 193 insertions(+), 134 deletions(-)

diff --git a/cpp/arcticdb/column_store/memory_segment.hpp b/cpp/arcticdb/column_store/memory_segment.hpp
index 0e599955ec..0dc734ffc7 100644
--- a/cpp/arcticdb/column_store/memory_segment.hpp
+++ b/cpp/arcticdb/column_store/memory_segment.hpp
@@ -398,8 +398,9 @@ class SegmentInMemory {
         return SegmentInMemory(impl_->filter(filter_bitset, filter_down_stringpool, validate));
     }
 
-    SegmentInMemory truncate(size_t start_row, size_t end_row) const{
-        return SegmentInMemory(impl_->truncate(start_row, end_row));
+    /// @see SegmentInMemoryImpl::truncate
+    SegmentInMemory truncate(size_t start_row, size_t end_row, bool reconstruct_string_pool) const{
+        return SegmentInMemory(impl_->truncate(start_row, end_row, reconstruct_string_pool));
     }
 
     std::vector<SegmentInMemory> partition(const std::vector<std::optional<uint8_t>>& row_to_segment,
diff --git a/cpp/arcticdb/column_store/memory_segment_impl.cpp b/cpp/arcticdb/column_store/memory_segment_impl.cpp
index efd3b17e4f..4be680acdc 100644
--- a/cpp/arcticdb/column_store/memory_segment_impl.cpp
+++ b/cpp/arcticdb/column_store/memory_segment_impl.cpp
@@ -418,8 +418,11 @@ bool operator==(const SegmentInMemoryImpl& left, const SegmentInMemoryImpl& righ
     return true;
 }
 
-// Inclusive of start_row, exclusive of end_row
-std::shared_ptr<SegmentInMemoryImpl> SegmentInMemoryImpl::truncate(size_t start_row, size_t end_row) const {
+std::shared_ptr<SegmentInMemoryImpl> SegmentInMemoryImpl::truncate(
+    size_t start_row,
+    size_t end_row,
+    bool reconstruct_string_pool
+) const {
     auto num_values = end_row - start_row;
     internal::check<ErrorCode::E_ASSERTION_FAILURE>(
             is_sparse() || (start_row < row_count() && end_row <= row_count()),
@@ -428,8 +431,10 @@ std::shared_ptr<SegmentInMemoryImpl> SegmentInMemoryImpl::truncate(size_t start_
     auto output = std::make_shared<SegmentInMemoryImpl>();
 
     output->set_row_data(num_values - 1);
-    output->set_string_pool(string_pool_);
     output->set_compacted(compacted_);
+    if (!reconstruct_string_pool) {
+        output->set_string_pool(string_pool_);
+    }
     if (metadata_) {
         google::protobuf::Any metadata;
         metadata.CopyFrom(*metadata_);
@@ -437,8 +442,25 @@ std::shared_ptr<SegmentInMemoryImpl> SegmentInMemoryImpl::truncate(size_t start_
     }
 
     for(const auto&& [idx, column] : folly::enumerate(columns_)) {
-        auto truncated_column = Column::truncate(column, start_row, end_row);
-        output->add_column(descriptor_->field(idx), truncated_column);
+        const DataType column_type = column->type().data_type();
+        const Field& field = descriptor_->field(idx);
+        if (is_sequence_type(column_type) && reconstruct_string_pool) {
+            output->add_column(field, num_values, true);
+            ChunkedBuffer& target = output->column(idx).data().buffer();
+            for (size_t row = 0u; row < num_values; ++row) {
+                util::variant_match(
+                    get_string_from_buffer(row, column->data().buffer(), output->string_pool()),
+                    [&](std::string_view sv) {
+                        OffsetString off_str = output->string_pool().get(sv);
+                        set_offset_string_at(row, target, off_str.offset());
+                    },
+                    [&](entity::position_t offset) { set_offset_string_at(row, target, offset); }
+                );
+            }
+        } else {
+            auto truncated_column = Column::truncate(column, start_row, end_row);
+            output->add_column(field, truncated_column);
+        }
     }
     output->attach_descriptor(descriptor_);
     return output;
diff --git a/cpp/arcticdb/column_store/memory_segment_impl.hpp b/cpp/arcticdb/column_store/memory_segment_impl.hpp
index 71f07eb3b8..7e8cb743d8 100644
--- a/cpp/arcticdb/column_store/memory_segment_impl.hpp
+++ b/cpp/arcticdb/column_store/memory_segment_impl.hpp
@@ -782,8 +782,8 @@ class SegmentInMemoryImpl {
 
     SegmentInMemoryImpl clone() const;
 
-    void set_string_pool(const std::shared_ptr<StringPool>& string_pool) {
-        string_pool_ = string_pool;
+    void set_string_pool(std::shared_ptr<StringPool> string_pool) {
+        string_pool_ = std::move(string_pool);
     }
 
     std::shared_ptr<SegmentInMemoryImpl> get_output_segment(size_t num_values, bool pre_allocate=true) const;
@@ -808,8 +808,18 @@ class SegmentInMemoryImpl {
 
     void set_timeseries_descriptor(TimeseriesDescriptor&& tsd);
 
-    // Inclusive of start_row, exclusive of end_row
-    std::shared_ptr<SegmentInMemoryImpl> truncate(size_t start_row, size_t end_row) const;
+    /// @brief Construct a copy of the segment containing only rows in [start_row; end_row)
+    /// @param start_row Start of the row range (inclusive)
+    /// @param end_Row End of the row range (exclusive)
+    /// @param reconstruct_string_pool When truncating some of the strings values of the original
+    ///  segment might not be referenced in the resulting segment. In this case, reconstructing the
+    ///  string pool will save some memory. Note that reconstructing the string pool is an expensive
+    ///  operation and should be avoided if possible.
+    std::shared_ptr<SegmentInMemoryImpl> truncate(
+        size_t start_row,
+        size_t end_row,
+        bool reconstruct_string_pool
+    ) const;
 
     // Partitions the segment into n new segments. Each row in the starting segment is mapped to one of the output segments
     // by the row_to_segment vector (std::nullopt means the row is not included in any output segment).
diff --git a/cpp/arcticdb/pipeline/filter_segment.hpp b/cpp/arcticdb/pipeline/filter_segment.hpp
index 1e6699a3b7..142d132bd0 100644
--- a/cpp/arcticdb/pipeline/filter_segment.hpp
+++ b/cpp/arcticdb/pipeline/filter_segment.hpp
@@ -9,8 +9,6 @@
 
 #include <arcticdb/column_store/memory_segment.hpp>
 #include <arcticdb/util/bitset.hpp>
-#include <folly/container/Enumerate.h>
-#include <arcticdb/entity/types.hpp>
 
 namespace arcticdb {
 
@@ -21,12 +19,6 @@ inline SegmentInMemory filter_segment(const SegmentInMemory& input,
     return input.filter(filter_bitset, filter_down_stringpool, validate);
 }
 
-inline SegmentInMemory truncate_segment(const SegmentInMemory& input,
-                                        size_t start,
-                                        size_t end) {
-    return input.truncate(start, end);
-}
-
 inline std::vector<SegmentInMemory> partition_segment(const SegmentInMemory& input,
                                       const std::vector<std::optional<uint8_t>>& row_to_segment,
                                       const std::vector<uint64_t>& segment_counts) {
diff --git a/cpp/arcticdb/pipeline/string_pool_utils.hpp b/cpp/arcticdb/pipeline/string_pool_utils.hpp
index 3a1212bc18..fac34f7894 100644
--- a/cpp/arcticdb/pipeline/string_pool_utils.hpp
+++ b/cpp/arcticdb/pipeline/string_pool_utils.hpp
@@ -36,8 +36,16 @@ inline auto get_string_from_pool(entity::position_t offset_val, const StringPool
     return string_pool.get_const_view(offset_val);
 }
 
-inline std::variant<std::string_view, entity::position_t> get_string_from_buffer(size_t offset, const ChunkedBuffer& src, const StringPool& string_pool) {
-    auto offset_val = get_offset_string_at(offset, src);
+/// @brief Get the i-th string from a buffer
+/// @param string_pos The index of the string to be returned
+/// @return If the string at @p string_pos is actual string inside @p string_pool return a
+///  string view, otherwise return an (integer) placeholder representing not a string.
+inline std::variant<std::string_view, entity::position_t> get_string_from_buffer(
+    size_t string_pos,
+    const ChunkedBuffer& src,
+    const StringPool& string_pool
+) {
+    auto offset_val = get_offset_string_at(string_pos, src);
     if (offset_val == nan_placeholder() || offset_val == not_a_string())
         return offset_val;
     else
diff --git a/cpp/arcticdb/pipeline/write_frame.cpp b/cpp/arcticdb/pipeline/write_frame.cpp
index b49caae4b7..1afc384227 100644
--- a/cpp/arcticdb/pipeline/write_frame.cpp
+++ b/cpp/arcticdb/pipeline/write_frame.cpp
@@ -215,86 +215,87 @@ folly::Future<entity::AtomKey> append_frame(
     });
 }
 
-void update_string_columns(const SegmentInMemory& original, SegmentInMemory output) {
-    util::check(original.descriptor() == output.descriptor(), "Update string column handling expects identical descriptors");
-    for (size_t column = 0; column < static_cast<size_t>(original.descriptor().fields().size()); ++column) {
-        auto &frame_field = original.field(column);
-        auto field_type = frame_field.type().data_type();
-
-        if (is_sequence_type(field_type)) {
-            auto &target = output.column(static_cast<position_t>(column)).data().buffer();
-            size_t end = output.row_count();
-            for(auto row = 0u; row < end; ++row) {
-                auto val = get_string_from_buffer(row, target, original.const_string_pool());
-                util::variant_match(val,
-                                    [&] (std::string_view sv) {
-                                        auto off_str = output.string_pool().get(sv);
-                                        set_offset_string_at(row, target, off_str.offset());
-                                    },
-                                    [&] (entity::position_t offset) {
-                                        set_offset_string_at(row, target, offset);
-                                    });
+/// @brief Find the row range of affected rows during a partial rewrite (on update)
+/// During partial rewrite the segment is either affected from the beginning to a
+/// certain row or from a certain row to the end. Thus the row range will always be
+/// either [0, row) or [row, end).
+static RowRange partial_rewrite_row_range(
+    const SegmentInMemory& segment,
+    const IndexRange& range,
+    AffectedSegmentEnd affected_end
+) {
+    if (affected_end == AffectedSegmentEnd::START) {
+        const timestamp start = std::get<timestamp>(range.start_);
+        auto bound = std::lower_bound(std::begin(segment), std::end(segment), start, [](const auto& row, timestamp t) {
+            return row.template index<TimeseriesIndex>() < t;
+        });
+        return {0, bound->row_pos()};
+    } else {
+        const timestamp end = std::get<timestamp>(range.end_);
+        auto bound = std::upper_bound(std::begin(segment), std::end(segment), end, [](timestamp t, auto& row) {
+            return t < row.template index<TimeseriesIndex>();
+        });
+        return {bound->row_pos(), segment.row_count()};
+    }
+}
 
-            }
-        }
+/// @brief Find the index range of affected rows during a partial rewrite (on update)
+/// Similar to partial_rewrite_row_range the segment is affected either at the beginning
+/// or at the end.
+static IndexRange partial_rewrite_index_range(
+    const IndexRange& segment_range,
+    const IndexRange& update_range,
+    AffectedSegmentEnd affected_end
+) {
+    if (affected_end == AffectedSegmentEnd::START) {
+        util::check(
+            segment_range.start_ < update_range.start_,
+            "Unexpected index range in after: {} !< {}",
+            segment_range.start_,
+            update_range.start_
+        );
+        return {segment_range.start_, update_range.start_};
+    } else {
+        util::check(
+            segment_range.end_ > update_range.end_,
+            "Unexpected non-intersection of update indices: {} !> {}",
+            segment_range.end_,
+            update_range.end_
+        );
+        return {segment_range.end_, update_range.end_};
     }
 }
 
 std::optional<SliceAndKey> rewrite_partial_segment(
-        const SliceAndKey& existing,
-        IndexRange index_range,
-         VersionId version_id,
-         bool before,
-         const std::shared_ptr<Store>& store) {
-    const auto& key =  existing.key();
-    const auto& existing_range =key.index_range();
+    const SliceAndKey& existing,
+    IndexRange index_range,
+    VersionId version_id,
+    AffectedSegmentEnd affected_end,
+    const std::shared_ptr<Store>& store
+) {
+    const auto& key = existing.key();
+    const auto& existing_range = key.index_range();
     auto kv = store->read(key).get();
-    auto &segment = kv.second;
-
-    auto start = std::get<timestamp>(index_range.start_);
-    auto end = std::get<timestamp>(index_range.end_);
-
-    if(!before) {
-        util::check(existing_range.start_ < index_range.start_, "Unexpected index range in after: {} !< {}", existing_range.start_, index_range.start_);
-        auto bound = std::lower_bound(std::begin(segment), std::end(segment), start, [] ( auto& row, timestamp t) {return row.template index<TimeseriesIndex>() < t; });
-        size_t num_rows = std::distance(std::begin(segment), bound);
-        if(num_rows == 0)
-            return std::nullopt;
-
-        auto output = SegmentInMemory{segment.descriptor(), num_rows};
-        std::copy(std::begin(segment), bound, std::back_inserter(output));
-        update_string_columns(segment, output);
-        FrameSlice new_slice{
-            std::make_shared<StreamDescriptor>(output.descriptor()),
-            existing.slice_.col_range,
-            RowRange{0, num_rows},
-            existing.slice_.hash_bucket(),
-            existing.slice_.num_buckets()};
-        auto fut_key = store->write(key.type(), version_id, key.id(), existing_range.start_, index_range.start_, std::move(output));
-        return SliceAndKey{std::move(new_slice), std::get<AtomKey>(std::move(fut_key).get())};
-    }
-    else {
-        util::check(existing_range.end_ > index_range.end_, "Unexpected non-intersection of update indices: {} !> {}", existing_range.end_ , index_range.end_);
-
-        auto bound = std::upper_bound(std::begin(segment), std::end(segment), end, [] ( timestamp t, auto& row) {
-            return t < row.template index<TimeseriesIndex>();
-        });
-        size_t num_rows = std::distance(bound, std::end(segment));
-        if(num_rows == 0)
-            return std::nullopt;
-
-        auto output = SegmentInMemory{segment.descriptor(), num_rows};
-        std::copy(bound, std::end(segment), std::back_inserter(output));
-        update_string_columns(segment, output);
-        FrameSlice new_slice{
-            std::make_shared<StreamDescriptor>(output.descriptor()),
-                    existing.slice_.col_range,
-                    RowRange{0, num_rows},
-                    existing.slice_.hash_bucket(),
-                    existing.slice_.num_buckets()};
-        auto fut_key = store->write(key.type(), version_id, key.id(), index_range.end_, existing_range.end_, std::move(output));
-        return SliceAndKey{std::move(new_slice), std::get<AtomKey>(std::move(fut_key).get())};
-    }
+    auto& segment = kv.second;
+    const IndexRange affected_index_range = partial_rewrite_index_range(existing_range, index_range, affected_end);
+    const RowRange affected_row_range = partial_rewrite_row_range(segment, index_range, affected_end);
+    const size_t num_rows = affected_row_range.end() - affected_row_range.start();
+    SegmentInMemory output = segment.truncate(affected_row_range.start(), affected_row_range.end(), true);
+    const FrameSlice new_slice{
+        std::make_shared<StreamDescriptor>(output.descriptor()),
+        existing.slice_.col_range,
+        RowRange{0, num_rows},
+        existing.slice_.hash_bucket(),
+        existing.slice_.num_buckets()};
+    auto fut_key = store->write(
+        key.type(),
+        version_id,
+        key.id(),
+        affected_index_range.start_,
+        affected_index_range.end_,
+        std::move(output)
+    );
+    return SliceAndKey{std::move(new_slice), std::get<AtomKey>(std::move(fut_key).get())};
 }
 
 std::vector<SliceAndKey> flatten_and_fix_rows(boost::span<const std::vector<SliceAndKey>> groups, size_t& global_count) {
diff --git a/cpp/arcticdb/pipeline/write_frame.hpp b/cpp/arcticdb/pipeline/write_frame.hpp
index 52e4b69b6b..bfd5169108 100644
--- a/cpp/arcticdb/pipeline/write_frame.hpp
+++ b/cpp/arcticdb/pipeline/write_frame.hpp
@@ -62,11 +62,16 @@ folly::Future<entity::AtomKey> append_frame(
         bool ignore_sort_order
 );
 
+enum class AffectedSegmentEnd {
+    START,
+    END
+};
+
 std::optional<SliceAndKey> rewrite_partial_segment(
         const SliceAndKey& existing,
         IndexRange index_range,
         VersionId version_id,
-        bool before,
+        AffectedSegmentEnd affected_end,
         const std::shared_ptr<Store>& store);
 
 // TODO: Use std::span when C++20 is enabled
diff --git a/cpp/arcticdb/processing/clause.cpp b/cpp/arcticdb/processing/clause.cpp
index 80899e2191..d78574ad36 100644
--- a/cpp/arcticdb/processing/clause.cpp
+++ b/cpp/arcticdb/processing/clause.cpp
@@ -799,7 +799,7 @@ Composite<EntityIds> RowRangeClause::process(Composite<EntityIds> &&entity_ids)
                 if (end_ > row_range->start() && end_ < row_range->end()) {
                     end_row = end_ - (row_range->start());
                 }
-                auto truncated_segment = truncate_segment(*proc.segments_->at(idx), start_row, end_row);
+                auto truncated_segment = proc.segments_->at(idx)->truncate(start_row, end_row, false);
                 auto num_rows = truncated_segment.is_null() ? 0 : truncated_segment.row_count();
                 proc.row_ranges_->at(idx) = std::make_shared<pipelines::RowRange>(proc.row_ranges_->at(idx)->first, proc.row_ranges_->at(idx)->first + num_rows);
                 auto num_cols = truncated_segment.is_null() ? 0 : truncated_segment.descriptor().field_count() - truncated_segment.descriptor().index().field_count();
diff --git a/cpp/arcticdb/processing/processing_unit.cpp b/cpp/arcticdb/processing/processing_unit.cpp
index 53346dfcaf..f0f98c4eed 100644
--- a/cpp/arcticdb/processing/processing_unit.cpp
+++ b/cpp/arcticdb/processing/processing_unit.cpp
@@ -34,7 +34,7 @@ void ProcessingUnit::truncate(size_t start_row, size_t end_row) {
                                                     "ProcessingUnit::truncate requires all of segments, row_ranges, and col_ranges to be present");
 
     for (auto&& [idx, segment]: folly::enumerate(*segments_)) {
-        auto seg = truncate_segment(*segment, start_row, end_row);
+        auto seg = segment->truncate(start_row, end_row, false);
         auto num_rows = seg.is_null() ? 0 : seg.row_count();
         row_ranges_->at(idx) = std::make_shared<pipelines::RowRange>(row_ranges_->at(idx)->first, row_ranges_->at(idx)->first + num_rows);
         auto num_cols = seg.is_null() ? 0 : seg.descriptor().field_count() - seg.descriptor().index().field_count();
diff --git a/cpp/arcticdb/python/python_handlers.cpp b/cpp/arcticdb/python/python_handlers.cpp
index 2f8d343c3a..675a8be1c2 100644
--- a/cpp/arcticdb/python/python_handlers.cpp
+++ b/cpp/arcticdb/python/python_handlers.cpp
@@ -5,7 +5,6 @@
  * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0.
  */
 #include <python/python_handlers.hpp>
-#include <python/gil_lock.hpp>
 #include <arcticdb/codec/slice_data_sink.hpp>
 #include <arcticdb/codec/encoding_sizes.hpp>
 #include <arcticdb/codec/codec.hpp>
@@ -49,7 +48,7 @@ namespace arcticdb {
 
     static inline const PyObject** fill_with_none(const PyObject** dest, size_t count) {
         auto none = py::none();
-        std::generate(dest, dest + count, [&none]() { return none.inc_ref().ptr(); });
+        std::generate_n(dest, count, [&none]() { return none.inc_ref().ptr(); });
         return dest + count;
     }
 
@@ -85,9 +84,6 @@ namespace arcticdb {
             using RawType = typename decltype(tag)::DataTypeTag::raw_type;
             RawType* ptr_dest = reinterpret_cast<RawType*>(dest);
             if constexpr (is_sequence_type(tag.data_type())) {
-                // not_a_string() is set here as strings are offset in the string pool
-                // later in fix_and_reduce() the offsets are replaced by a real python strings
-                // TODO: It would be best if the type handler sets this to py::none
                 std::fill_n(ptr_dest, m.num_rows_, arcticdb::not_a_string());
             } else if constexpr (is_nullable_type(TypeDescriptor(tag))) {
                 fill_with_none(reinterpret_cast<const PyObject**>(dest), m.num_rows_);
@@ -140,35 +136,30 @@ namespace arcticdb {
             util::check(field->has_ndarray(), "Bool handler expected array");
             ARCTICDB_DEBUG(log::version(), "Bool handler got encoded field: {}", field->DebugString());
             auto ptr_dest = reinterpret_cast<const PyObject**>(dest);
-
-            if(!field->ndarray().sparse_map_bytes()) {
-                ARCTICDB_DEBUG(log::version(), "Bool handler has no values");
-                fill_with_none(ptr_dest, m.num_rows_);
-                return;
-            }
-
-            std::optional<util::BitSet> bv;
             const auto& ndarray = field->ndarray();
             const auto bytes = encoding_sizes::data_uncompressed_size(ndarray);
-            auto sparse = ChunkedBuffer::presized(bytes);
-            SliceDataSink sparse_sink{sparse.data(), bytes};
-            data += decode_field(m.source_type_desc_, *field, data, sparse_sink, bv, encding_version);
-            const auto num_bools = bv->count();
-            auto ptr_src = sparse.template ptr_cast<uint8_t>(0, num_bools * sizeof(uint8_t));
-            auto last_row = 0u;
-            const auto ptr_begin = ptr_dest;
-
-            for(auto en = bv->first(); en < bv->end(); ++en) {
-                const auto current_pos = *en;
-                ptr_dest = fill_with_none(ptr_dest, current_pos - last_row);
-                last_row = current_pos;
-                auto py_bool = py::bool_(static_cast<bool>(*ptr_src++));
-                *ptr_dest++ = py_bool.release().ptr();
-                ++last_row;
-                util::check(ptr_begin + last_row == ptr_dest, "Boolean pointer out of alignment {} != {}", last_row, uintptr_t(ptr_begin + last_row));
+            ChunkedBuffer decoded_data = ChunkedBuffer::presized(bytes);
+            SliceDataSink decoded_data_sink{decoded_data.data(), bytes};
+            std::optional<util::BitSet> sparse_map;
+            data += decode_field(m.source_type_desc_, *field, data, decoded_data_sink, sparse_map, encding_version);
+            const auto num_bools = sparse_map.has_value() ? sparse_map->count() : m.num_rows_;
+            auto ptr_src = decoded_data.template ptr_cast<uint8_t>(0, num_bools * sizeof(uint8_t));
+            if (sparse_map.has_value()) {
+                ARCTICDB_TRACE(log::codec(), "Bool handler using a sparse map");
+                unsigned last_row = 0u;
+                for (auto en = sparse_map->first(); en < sparse_map->end(); ++en, last_row++) {
+                    const auto current_pos = *en;
+                    ptr_dest = fill_with_none(ptr_dest, current_pos - last_row);
+                    last_row = current_pos;
+                    *ptr_dest++ = py::bool_(static_cast<bool>(*ptr_src++)).release().ptr();
+                }
+                fill_with_none(ptr_dest, m.num_rows_ - last_row);
+            } else {
+                ARCTICDB_TRACE(log::codec(), "Bool handler didn't find a sparse map. Assuming dense array.");
+                std::transform(ptr_src, ptr_src + num_bools, ptr_dest, [](uint8_t value) {
+                    return py::bool_(static_cast<bool>(value)).release().ptr();
+                });
             }
-            fill_with_none(ptr_dest, m.num_rows_ - last_row);
-            util::check(ptr_begin + last_row == ptr_dest, "Boolean pointer out of alignment at end: {} != {}", last_row, uintptr_t(ptr_begin + last_row));
         }, encoded_field_info);
     }
 
diff --git a/cpp/arcticdb/version/version_core.cpp b/cpp/arcticdb/version/version_core.cpp
index fb304c9a30..328d8429c3 100644
--- a/cpp/arcticdb/version/version_core.cpp
+++ b/cpp/arcticdb/version/version_core.cpp
@@ -210,16 +210,28 @@ inline std::pair<std::vector<SliceAndKey>, std::vector<SliceAndKey>> intersectin
 
     for (const auto& affected_slice_and_key : affected_keys) {
         const auto& affected_range = affected_slice_and_key.key().index_range();
-        if (intersects(affected_range, front_range) && !overlaps(affected_range, front_range)
-        && is_before(affected_range, front_range)) {
-            auto front_overlap_key = rewrite_partial_segment(affected_slice_and_key, front_range, version_id, false, store);
+        if (intersects(affected_range, front_range) && !overlaps(affected_range, front_range) &&
+            is_before(affected_range, front_range)) {
+            auto front_overlap_key = rewrite_partial_segment(
+                affected_slice_and_key,
+                front_range,
+                version_id,
+                AffectedSegmentEnd::START,
+                store
+            );
             if (front_overlap_key)
                 intersect_before.push_back(*front_overlap_key);
         }
 
         if (intersects(affected_range, back_range) && !overlaps(affected_range, back_range)
         && is_after(affected_range, back_range)) {
-            auto back_overlap_key = rewrite_partial_segment(affected_slice_and_key, back_range, version_id, true, store);
+            auto back_overlap_key = rewrite_partial_segment(
+                affected_slice_and_key,
+                back_range,
+                version_id,
+                AffectedSegmentEnd::END,
+                store
+            );
             if (back_overlap_key)
                 intersect_after.push_back(*back_overlap_key);
         }
diff --git a/python/tests/unit/arcticdb/version_store/test_empty_column_type.py b/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
index 9cccbea962..5d881d39fe 100644
--- a/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
+++ b/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
@@ -227,4 +227,21 @@ def test_date(self, lmdb_version_store_static_and_dynamic):
         assert_frame_equal(
             lmdb_version_store_static_and_dynamic.read("sym", row_range=[3,5]).data,
             pd.DataFrame({"col1": np.array([np.datetime64('NaT'), np.datetime64('NaT')], dtype="datetime64[ns]")})
-        )
\ No newline at end of file
+        )
+
+
+class TestCanUpdateWithEmpty:
+    def test_bool(self, lmdb_version_store_static_and_dynamic, boolean_dtype):
+        index = list(pd.date_range(start="1/1/2024", end="1/4/2024"))
+        lmdb_version_store_static_and_dynamic.write(
+            "sym",
+            pd.DataFrame({"col": [True, True, True, True]}, dtype=boolean_dtype, index=index)
+        )
+        lmdb_version_store_static_and_dynamic.update(
+            "sym",
+            pd.DataFrame({"col": [None, None]}, dtype=boolean_dtype, index=list(pd.date_range(start="1/2/2024", end="1/3/2024")))
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym").data,
+            pd.DataFrame({"col": [True, None, None, True]}, index=index, dtype=boolean_dtype)
+        )

From 0efb037d305df42ed8202135b86e15fee9690e72 Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Tue, 30 Jan 2024 16:21:34 +0200
Subject: [PATCH 08/30] Complete tesing for updating with empty type colummn

---
 cpp/arcticdb/CMakeLists.txt                   |   1 -
 cpp/arcticdb/column_store/column.cpp          |   2 -
 cpp/arcticdb/column_store/column.hpp          |   4 +-
 .../column_store/memory_segment_impl.cpp      |  19 ++-
 cpp/arcticdb/pipeline/write_frame.cpp         |  21 +--
 cpp/arcticdb/pipeline/write_frame.hpp         |   4 +-
 cpp/arcticdb/version/version_core.cpp         |  11 +-
 .../version_store/test_empty_column_type.py   | 137 ++++++++++++++++--
 8 files changed, 154 insertions(+), 45 deletions(-)

diff --git a/cpp/arcticdb/CMakeLists.txt b/cpp/arcticdb/CMakeLists.txt
index 71e347ae62..ea1d9dfdc8 100644
--- a/cpp/arcticdb/CMakeLists.txt
+++ b/cpp/arcticdb/CMakeLists.txt
@@ -230,7 +230,6 @@ set(arcticdb_srcs
         pipeline/read_pipeline.hpp
         pipeline/slicing.hpp
         pipeline/string_pool_utils.hpp
-        pipeline/string_pool_utils.hpp
         pipeline/value.hpp
         pipeline/value_set.hpp
         pipeline/write_frame.hpp
diff --git a/cpp/arcticdb/column_store/column.cpp b/cpp/arcticdb/column_store/column.cpp
index 4d62a6b4d7..3be9d17f0c 100644
--- a/cpp/arcticdb/column_store/column.cpp
+++ b/cpp/arcticdb/column_store/column.cpp
@@ -512,8 +512,6 @@ std::vector<std::shared_ptr<Column>> Column::split(const std::shared_ptr<Column>
     return output;
 }
 
-// Produces a new column
-// Inclusive of start_row, exclusive of end_row
 std::shared_ptr<Column> Column::truncate(const std::shared_ptr<Column>& column, size_t start_row, size_t end_row) {
     auto type_size = get_type_size(column->type().data_type());
     // Bytes from the underlying chunked buffer to include. Inclusive of start_byte, exclusive of end_byte
diff --git a/cpp/arcticdb/column_store/column.hpp b/cpp/arcticdb/column_store/column.hpp
index b8f69d1599..9ca1d62712 100644
--- a/cpp/arcticdb/column_store/column.hpp
+++ b/cpp/arcticdb/column_store/column.hpp
@@ -647,7 +647,9 @@ class Column {
     }
 
     static std::vector<std::shared_ptr<Column>> split(const std::shared_ptr<Column>& column, size_t num_rows);
-
+    /// @biref Produces a new column containing only the data in range [start_row, end_row)
+    /// @param[in] start_row Inclusive start of the row range
+    /// @param[in] end_row Exclusive end of the row range
     static std::shared_ptr<Column> truncate(const std::shared_ptr<Column>& column, size_t start_row, size_t end_row);
 
 private:
diff --git a/cpp/arcticdb/column_store/memory_segment_impl.cpp b/cpp/arcticdb/column_store/memory_segment_impl.cpp
index 4be680acdc..f483b7b4a9 100644
--- a/cpp/arcticdb/column_store/memory_segment_impl.cpp
+++ b/cpp/arcticdb/column_store/memory_segment_impl.cpp
@@ -446,16 +446,15 @@ std::shared_ptr<SegmentInMemoryImpl> SegmentInMemoryImpl::truncate(
         const Field& field = descriptor_->field(idx);
         if (is_sequence_type(column_type) && reconstruct_string_pool) {
             output->add_column(field, num_values, true);
+            const ChunkedBuffer& source = column->data().buffer();
             ChunkedBuffer& target = output->column(idx).data().buffer();
-            for (size_t row = 0u; row < num_values; ++row) {
-                util::variant_match(
-                    get_string_from_buffer(row, column->data().buffer(), output->string_pool()),
-                    [&](std::string_view sv) {
-                        OffsetString off_str = output->string_pool().get(sv);
-                        set_offset_string_at(row, target, off_str.offset());
-                    },
-                    [&](entity::position_t offset) { set_offset_string_at(row, target, offset); }
+            for (size_t row = 0; row < num_values; ++row) {
+                const StringPool::offset_t offset = util::variant_match(
+                    get_string_from_buffer(start_row + row, source, *string_pool_),
+                    [&](std::string_view sv) { return output->string_pool().get(sv).offset(); },
+                    [](StringPool::offset_t offset) { return offset; }
                 );
+                set_offset_string_at(row, target, offset);
             }
         } else {
             auto truncated_column = Column::truncate(column, start_row, end_row);
@@ -466,8 +465,8 @@ std::shared_ptr<SegmentInMemoryImpl> SegmentInMemoryImpl::truncate(
     return output;
 }
 
-// Combine 2 segments that hold different columns associated with the same rows
-// If unique_column_names is true, any columns from other with names matching those in this are ignored
+/// @brief Combine 2 segments that hold different columns associated with the same rows
+/// @param[in] unique_column_names If true, any columns from other with names matching those in this are ignored
 void SegmentInMemoryImpl::concatenate(SegmentInMemoryImpl&& other, bool unique_column_names) {
         internal::check<ErrorCode::E_INVALID_ARGUMENT>(
                 row_count() == other.row_count(),
diff --git a/cpp/arcticdb/pipeline/write_frame.cpp b/cpp/arcticdb/pipeline/write_frame.cpp
index 1afc384227..8f895ee07a 100644
--- a/cpp/arcticdb/pipeline/write_frame.cpp
+++ b/cpp/arcticdb/pipeline/write_frame.cpp
@@ -222,9 +222,9 @@ folly::Future<entity::AtomKey> append_frame(
 static RowRange partial_rewrite_row_range(
     const SegmentInMemory& segment,
     const IndexRange& range,
-    AffectedSegmentEnd affected_end
+    AffectedSegmentPart affected_end
 ) {
-    if (affected_end == AffectedSegmentEnd::START) {
+    if (affected_end == AffectedSegmentPart::START) {
         const timestamp start = std::get<timestamp>(range.start_);
         auto bound = std::lower_bound(std::begin(segment), std::end(segment), start, [](const auto& row, timestamp t) {
             return row.template index<TimeseriesIndex>() < t;
@@ -245,9 +245,9 @@ static RowRange partial_rewrite_row_range(
 static IndexRange partial_rewrite_index_range(
     const IndexRange& segment_range,
     const IndexRange& update_range,
-    AffectedSegmentEnd affected_end
+    AffectedSegmentPart affected_part
 ) {
-    if (affected_end == AffectedSegmentEnd::START) {
+    if (affected_part == AffectedSegmentPart::START) {
         util::check(
             segment_range.start_ < update_range.start_,
             "Unexpected index range in after: {} !< {}",
@@ -270,18 +270,18 @@ std::optional<SliceAndKey> rewrite_partial_segment(
     const SliceAndKey& existing,
     IndexRange index_range,
     VersionId version_id,
-    AffectedSegmentEnd affected_end,
+    AffectedSegmentPart affected_part,
     const std::shared_ptr<Store>& store
 ) {
     const auto& key = existing.key();
-    const auto& existing_range = key.index_range();
+    const IndexRange& existing_range = key.index_range();
     auto kv = store->read(key).get();
-    auto& segment = kv.second;
-    const IndexRange affected_index_range = partial_rewrite_index_range(existing_range, index_range, affected_end);
-    const RowRange affected_row_range = partial_rewrite_row_range(segment, index_range, affected_end);
+    const SegmentInMemory& segment = kv.second;
+    const IndexRange affected_index_range = partial_rewrite_index_range(existing_range, index_range, affected_part);
+    const RowRange affected_row_range = partial_rewrite_row_range(segment, index_range, affected_part);
     const size_t num_rows = affected_row_range.end() - affected_row_range.start();
     SegmentInMemory output = segment.truncate(affected_row_range.start(), affected_row_range.end(), true);
-    const FrameSlice new_slice{
+    FrameSlice new_slice{
         std::make_shared<StreamDescriptor>(output.descriptor()),
         existing.slice_.col_range,
         RowRange{0, num_rows},
@@ -300,6 +300,7 @@ std::optional<SliceAndKey> rewrite_partial_segment(
 
 std::vector<SliceAndKey> flatten_and_fix_rows(boost::span<const std::vector<SliceAndKey>> groups, size_t& global_count) {
     std::vector<SliceAndKey> output;
+    output.reserve(groups.size());
     global_count = 0;
     for (const std::vector<SliceAndKey>& group : groups) {
         if (group.empty())
diff --git a/cpp/arcticdb/pipeline/write_frame.hpp b/cpp/arcticdb/pipeline/write_frame.hpp
index bfd5169108..28559823eb 100644
--- a/cpp/arcticdb/pipeline/write_frame.hpp
+++ b/cpp/arcticdb/pipeline/write_frame.hpp
@@ -62,7 +62,7 @@ folly::Future<entity::AtomKey> append_frame(
         bool ignore_sort_order
 );
 
-enum class AffectedSegmentEnd {
+enum class AffectedSegmentPart {
     START,
     END
 };
@@ -71,7 +71,7 @@ std::optional<SliceAndKey> rewrite_partial_segment(
         const SliceAndKey& existing,
         IndexRange index_range,
         VersionId version_id,
-        AffectedSegmentEnd affected_end,
+        AffectedSegmentPart affected_part,
         const std::shared_ptr<Store>& store);
 
 // TODO: Use std::span when C++20 is enabled
diff --git a/cpp/arcticdb/version/version_core.cpp b/cpp/arcticdb/version/version_core.cpp
index 328d8429c3..74f258dfe5 100644
--- a/cpp/arcticdb/version/version_core.cpp
+++ b/cpp/arcticdb/version/version_core.cpp
@@ -204,7 +204,8 @@ inline std::pair<std::vector<SliceAndKey>, std::vector<SliceAndKey>> intersectin
     const IndexRange& front_range,
     const IndexRange& back_range,
     VersionId version_id,
-    const std::shared_ptr<Store>& store) {
+    const std::shared_ptr<Store>& store
+) {
     std::vector<SliceAndKey> intersect_before;
     std::vector<SliceAndKey> intersect_after;
 
@@ -216,20 +217,20 @@ inline std::pair<std::vector<SliceAndKey>, std::vector<SliceAndKey>> intersectin
                 affected_slice_and_key,
                 front_range,
                 version_id,
-                AffectedSegmentEnd::START,
+                AffectedSegmentPart::START,
                 store
             );
             if (front_overlap_key)
                 intersect_before.push_back(*front_overlap_key);
         }
 
-        if (intersects(affected_range, back_range) && !overlaps(affected_range, back_range)
-        && is_after(affected_range, back_range)) {
+        if (intersects(affected_range, back_range) && !overlaps(affected_range, back_range) &&
+            is_after(affected_range, back_range)) {
             auto back_overlap_key = rewrite_partial_segment(
                 affected_slice_and_key,
                 back_range,
                 version_id,
-                AffectedSegmentEnd::END,
+                AffectedSegmentPart::END,
                 store
             );
             if (back_overlap_key)
diff --git a/python/tests/unit/arcticdb/version_store/test_empty_column_type.py b/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
index 5d881d39fe..290ef1cc2d 100644
--- a/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
+++ b/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
@@ -27,8 +27,12 @@ def boolean_dtype(request):
     yield request.param
 
 
-class TestCanAppendToEmptyColumn:
+@pytest.fixture(params=["datetime64[ns]"])
+def date_dtype(request):
+    yield request.param
+
 
+class TestCanAppendToEmptyColumn:
     @pytest.fixture(autouse=True)
     def create_empty_column(self, lmdb_version_store_static_and_dynamic):
         lmdb_version_store_static_and_dynamic.write("sym", pd.DataFrame({"col1": [None, None]}))
@@ -104,14 +108,38 @@ def test_string(self, lmdb_version_store_static_and_dynamic):
             df_non_empty
         )
 
-    def test_date(self, lmdb_version_store_static_and_dynamic):
-        df_non_empty = pd.DataFrame({"col1": np.array([np.datetime64('2005-02'), np.datetime64('2005-03'), np.datetime64('2005-03')])}, dtype="datetime64[ns]")
+    def test_date(self, lmdb_version_store_static_and_dynamic, date_dtype):
+        df_non_empty = pd.DataFrame(
+            {
+                "col1": np.array(
+                    [
+                        np.datetime64('2005-02'),
+                        np.datetime64('2005-03'),
+                        np.datetime64('2005-03')
+                    ]
+                )
+            },
+            dtype=date_dtype
+        )
         lmdb_version_store_static_and_dynamic.append("sym", df_non_empty)
-        expected_result = pd.DataFrame({"col1": np.array([np.datetime64('NaT'), np.datetime64('NaT'), np.datetime64('2005-02'), np.datetime64('2005-03'), np.datetime64('2005-03')], dtype="datetime64[ns]")})
+        expected_result = pd.DataFrame(
+            {
+                "col1": np.array(
+                    [
+                        np.datetime64('NaT'),
+                        np.datetime64('NaT'),
+                        np.datetime64('2005-02'),
+                        np.datetime64('2005-03'),
+                        np.datetime64('2005-03')
+                    ],
+                    dtype=date_dtype
+                )
+            }
+        )
         assert_frame_equal(lmdb_version_store_static_and_dynamic.read("sym").data, expected_result)
         assert_frame_equal(
             lmdb_version_store_static_and_dynamic.read("sym", row_range=[0,2]).data,
-            pd.DataFrame({"col1": np.array([np.datetime64('NaT'), np.datetime64('NaT')], dtype="datetime64[ns]")})
+            pd.DataFrame({"col1": np.array([np.datetime64('NaT'), np.datetime64('NaT')], dtype=date_dtype)})
         )
         assert_frame_equal(
             lmdb_version_store_static_and_dynamic.read("sym", row_range=[2,5]).data,
@@ -120,7 +148,6 @@ def test_date(self, lmdb_version_store_static_and_dynamic):
 
 
 class TestCanAppendEmptyToColumn:
-
     def test_integer(self, lmdb_version_store_static_and_dynamic, int_dtype):
         df_initial = pd.DataFrame({"col1": np.array([1,2,3], dtype=int_dtype)})
         lmdb_version_store_static_and_dynamic.write("sym", df_initial)
@@ -191,7 +218,7 @@ def test_string(self, lmdb_version_store_static_and_dynamic):
             df_with_none
         )
 
-    def test_date(self, lmdb_version_store_static_and_dynamic):
+    def test_date(self, lmdb_version_store_static_and_dynamic, date_dtype):
         df_initial = pd.DataFrame(
             {
                 "col1": np.array(
@@ -202,7 +229,7 @@ def test_date(self, lmdb_version_store_static_and_dynamic):
                     ]
                 )
             },
-            dtype="datetime64[ns]"
+            dtype=date_dtype
         )
         lmdb_version_store_static_and_dynamic.write("sym", df_initial)
         lmdb_version_store_static_and_dynamic.append("sym", pd.DataFrame({"col1": np.array([None, None])}))
@@ -216,7 +243,7 @@ def test_date(self, lmdb_version_store_static_and_dynamic):
                         np.datetime64('NaT'),
                         np.datetime64('NaT')
                     ],
-                    dtype="datetime64[ns]")
+                    dtype=date_dtype)
             }
         )
         assert_frame_equal(lmdb_version_store_static_and_dynamic.read("sym").data, expected_result)
@@ -226,22 +253,104 @@ def test_date(self, lmdb_version_store_static_and_dynamic):
         )
         assert_frame_equal(
             lmdb_version_store_static_and_dynamic.read("sym", row_range=[3,5]).data,
-            pd.DataFrame({"col1": np.array([np.datetime64('NaT'), np.datetime64('NaT')], dtype="datetime64[ns]")})
+            pd.DataFrame({"col1": np.array([np.datetime64('NaT'), np.datetime64('NaT')], dtype=date_dtype)})
         )
 
 
 class TestCanUpdateWithEmpty:
+    def index(self):
+        return list(pd.date_range(start="1/1/2024", end="1/4/2024"))
+
+    def update_index(self):
+        return list(pd.date_range(start="1/2/2024", end="1/3/2024"))
+
+    def test_int(self, lmdb_version_store_static_and_dynamic, int_dtype):
+        lmdb_version_store_static_and_dynamic.write(
+            "sym",
+            pd.DataFrame({"col": [1, 2, 3, 4]}, dtype=int_dtype, index=self.index())
+        )
+        lmdb_version_store_static_and_dynamic.update(
+            "sym",
+            pd.DataFrame({"col": [None, None]}, index=self.update_index())
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym").data,
+            pd.DataFrame({"col": [1, 0, 0, 4]}, index=self.index(), dtype=int_dtype)
+        )
+
+    def test_float(self, lmdb_version_store_static_and_dynamic, float_dtype):
+        lmdb_version_store_static_and_dynamic.write(
+            "sym",
+            pd.DataFrame({"col": [1, 2, 3, 4]}, dtype=float_dtype, index=self.index())
+        )
+        lmdb_version_store_static_and_dynamic.update(
+            "sym",
+            pd.DataFrame({"col": [None, None]}, index=self.update_index())
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym").data,
+            pd.DataFrame({"col": [1, float("NaN"), float("NaN"), 4]}, index=self.index(), dtype=float_dtype)
+        )
+
     def test_bool(self, lmdb_version_store_static_and_dynamic, boolean_dtype):
-        index = list(pd.date_range(start="1/1/2024", end="1/4/2024"))
         lmdb_version_store_static_and_dynamic.write(
             "sym",
-            pd.DataFrame({"col": [True, True, True, True]}, dtype=boolean_dtype, index=index)
+            pd.DataFrame({"col": [True, True, True, True]}, dtype=boolean_dtype, index=self.index())
         )
         lmdb_version_store_static_and_dynamic.update(
             "sym",
-            pd.DataFrame({"col": [None, None]}, dtype=boolean_dtype, index=list(pd.date_range(start="1/2/2024", end="1/3/2024")))
+            pd.DataFrame({"col": [None, None]}, dtype=boolean_dtype, index=self.update_index())
         )
         assert_frame_equal(
             lmdb_version_store_static_and_dynamic.read("sym").data,
-            pd.DataFrame({"col": [True, None, None, True]}, index=index, dtype=boolean_dtype)
+            pd.DataFrame({"col": [True, None, None, True]}, index=self.index(), dtype=boolean_dtype)
+        )
+
+    def test_string(self, lmdb_version_store_static_and_dynamic):
+        lmdb_version_store_static_and_dynamic.write(
+            "sym",
+            pd.DataFrame({"col": ["a", "longstr"*20, "b", "longstr"*20]}, index=self.index())
+        )
+        lmdb_version_store_static_and_dynamic.update(
+            "sym",
+            pd.DataFrame({"col": [None, None]}, index=self.update_index())
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym").data,
+            pd.DataFrame({"col": ["a", None, None, "longstr"*20]}, index=self.index())
+        )
+
+    def test_date(self, lmdb_version_store_static_and_dynamic, date_dtype):
+        lmdb_version_store_static_and_dynamic.write(
+            "sym",
+            pd.DataFrame(
+                {
+                    "col": [
+                        np.datetime64('2005-02'),
+                        np.datetime64('2005-03'),
+                        np.datetime64('2005-04'),
+                        np.datetime64('2005-05')
+                    ]
+                },
+                dtype=date_dtype,
+                index=self.index()
+            )
+        )
+        lmdb_version_store_static_and_dynamic.update(
+            "sym",
+            pd.DataFrame({"col": [None, None]}, index=self.update_index())
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym").data,
+            pd.DataFrame(
+                {
+                    "col": [
+                        np.datetime64('2005-02'),
+                        np.datetime64('NaT'),
+                        np.datetime64('NaT'),
+                        np.datetime64('2005-05')
+                    ]
+                },
+                index=self.index()
+            )
         )

From f25b788dffd73cfde9250efb8a5711ecd94c12db Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Tue, 30 Jan 2024 17:16:34 +0200
Subject: [PATCH 09/30] Additional test

---
 .../unit/arcticdb/version_store/test_empty_column_type.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/python/tests/unit/arcticdb/version_store/test_empty_column_type.py b/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
index 290ef1cc2d..4618449edc 100644
--- a/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
+++ b/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
@@ -354,3 +354,11 @@ def test_date(self, lmdb_version_store_static_and_dynamic, date_dtype):
                 index=self.index()
             )
         )
+
+class TestEmptyTypesIsOverriden:
+    def test_cannot_append_different_type_after_first_not_none(self, lmdb_version_store_static_and_dynamic):
+        lmdb_version_store_static_and_dynamic.write("sym", pd.DataFrame({"col": [None, None]}))
+        lmdb_version_store_static_and_dynamic.append("sym", pd.DataFrame({"col": [1, 2, 3]}))
+        lmdb_version_store_static_and_dynamic.append("sym", pd.DataFrame({"col": [None, None]}))
+        with pytest.raises(Exception):
+            lmdb_version_store_static_and_dynamic.append("sym", pd.DataFrame({"col": ["some", "string"]}))
\ No newline at end of file

From 17b642ca52603d75c79f6d713fe42508d32c959c Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Wed, 31 Jan 2024 14:33:04 +0200
Subject: [PATCH 10/30] FWD declare buffer holder

---
 cpp/arcticdb/entity/frame_and_descriptor.hpp | 1 -
 cpp/arcticdb/pipeline/read_frame.hpp         | 6 ++++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/cpp/arcticdb/entity/frame_and_descriptor.hpp b/cpp/arcticdb/entity/frame_and_descriptor.hpp
index 15f520fcb2..4a6a19a3f3 100644
--- a/cpp/arcticdb/entity/frame_and_descriptor.hpp
+++ b/cpp/arcticdb/entity/frame_and_descriptor.hpp
@@ -8,7 +8,6 @@
 #pragma once
 
 #include <arcticdb/column_store/memory_segment.hpp>
-#include <arcticdb/entity/protobufs.hpp>
 #include <arcticdb/util/buffer_holder.hpp>
 
 namespace arcticdb {
diff --git a/cpp/arcticdb/pipeline/read_frame.hpp b/cpp/arcticdb/pipeline/read_frame.hpp
index 0249c91c4d..d0c28348b8 100644
--- a/cpp/arcticdb/pipeline/read_frame.hpp
+++ b/cpp/arcticdb/pipeline/read_frame.hpp
@@ -8,16 +8,18 @@
 #pragma once
 
 #include <arcticdb/stream/stream_source.hpp>
-#include <arcticdb/async/task_scheduler.hpp>
 #include <arcticdb/pipeline/pipeline_context.hpp>
 #include <arcticdb/pipeline/read_options.hpp>
 #include <arcticdb/util/bitset.hpp>
-#include <arcticdb/util/buffer_holder.hpp>
 
 #include <folly/futures/Future.h>
 
 #include <memory>
 
+namespace arcticdb {
+    struct BufferHolder;
+}
+
 namespace arcticdb::pipelines {
 
 SegmentInMemory allocate_frame(const std::shared_ptr<PipelineContext>& context);

From 0334f45e1be4cecf3026fb0b0b32b2f9532d86e5 Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Thu, 1 Feb 2024 12:40:31 +0200
Subject: [PATCH 11/30] Add a single function to handle default initialization.

---
 cpp/arcticdb/codec/codec-inl.hpp        |  4 ---
 cpp/arcticdb/pipeline/read_frame.cpp    |  5 ++--
 cpp/arcticdb/python/python_handlers.cpp | 33 +++++++++++--------------
 cpp/arcticdb/python/python_handlers.hpp |  3 +++
 cpp/arcticdb/util/sparse_utils.hpp      | 23 +++++++++++------
 cpp/arcticdb/util/type_handler.cpp      |  6 ++---
 cpp/arcticdb/util/type_handler.hpp      | 17 +++++++------
 7 files changed, 49 insertions(+), 42 deletions(-)

diff --git a/cpp/arcticdb/codec/codec-inl.hpp b/cpp/arcticdb/codec/codec-inl.hpp
index a799def672..1c628cb928 100644
--- a/cpp/arcticdb/codec/codec-inl.hpp
+++ b/cpp/arcticdb/codec/codec-inl.hpp
@@ -18,10 +18,6 @@
 
 #include <arcticdb/util/bitset.hpp>
 
-#include <arcticdb/util/buffer.hpp>
-
-#include <google/protobuf/text_format.h>
-
 #include <type_traits>
 
 namespace arcticdb {
diff --git a/cpp/arcticdb/pipeline/read_frame.cpp b/cpp/arcticdb/pipeline/read_frame.cpp
index d62a037d17..69bc82820b 100644
--- a/cpp/arcticdb/pipeline/read_frame.cpp
+++ b/cpp/arcticdb/pipeline/read_frame.cpp
@@ -476,19 +476,20 @@ void decode_into_frame_dynamic(
                     m.source_type_desc_.visit_tag([&](auto src_desc_tag) {
                         using SourceType = typename decltype(src_desc_tag)::DataTypeTag::raw_type;
                         if constexpr (std::is_arithmetic_v<SourceType> && std::is_arithmetic_v<DestinationType>) {
-                            const auto src_bytes = sizeof_datatype(m.source_type_desc_) * m.num_rows_;
                             auto dest_ptr = reinterpret_cast<DestinationType*>(buffer.data() + m.offset_bytes_);
                             if constexpr (is_empty_type(src_desc_tag.data_type())) {
+                                const size_t dest_bytes = m.num_rows_ * sizeof_datatype(m.dest_type_desc_);
                                 decode_or_expand(
                                     data,
                                     reinterpret_cast<uint8_t*>(dest_ptr),
                                     encoded_field,
-                                    src_bytes,
+                                    dest_bytes,
                                     buffers,
                                     encdoing_version,
                                     m
                                 );
                             } else {
+                                const auto src_bytes = sizeof_datatype(m.source_type_desc_) * m.num_rows_;
                                 Buffer tmp_buf{src_bytes};
                                 decode_or_expand(
                                     data,
diff --git a/cpp/arcticdb/python/python_handlers.cpp b/cpp/arcticdb/python/python_handlers.cpp
index 675a8be1c2..fb2450b58d 100644
--- a/cpp/arcticdb/python/python_handlers.cpp
+++ b/cpp/arcticdb/python/python_handlers.cpp
@@ -8,6 +8,7 @@
 #include <arcticdb/codec/slice_data_sink.hpp>
 #include <arcticdb/codec/encoding_sizes.hpp>
 #include <arcticdb/codec/codec.hpp>
+#include <arcticdb/util/buffer_holder.hpp>
 
 namespace arcticdb {
 
@@ -76,26 +77,8 @@ namespace arcticdb {
         );
         static_assert(get_type_size(DataType::EMPTYVAL) == sizeof(PyObject*));
 
-        // TODO: arcticdb::util::default_initialize does mostly the same thing
-        // the main difference is that it does not write py::none directly
-        // * Should use that?
-        // * It doesn't handle nullable bools and arrays? How should they be handled?
         m.dest_type_desc_.visit_tag([&](auto tag) {
-            using RawType = typename decltype(tag)::DataTypeTag::raw_type;
-            RawType* ptr_dest = reinterpret_cast<RawType*>(dest);
-            if constexpr (is_sequence_type(tag.data_type())) {
-                std::fill_n(ptr_dest, m.num_rows_, arcticdb::not_a_string());
-            } else if constexpr (is_nullable_type(TypeDescriptor(tag))) {
-                fill_with_none(reinterpret_cast<const PyObject**>(dest), m.num_rows_);
-            } else if constexpr (is_floating_point_type(tag.data_type())) {
-                std::fill_n(ptr_dest, m.num_rows_, std::numeric_limits<RawType>::quiet_NaN());
-            } else if constexpr (is_integer_type(tag.data_type()) || is_bool_type(tag.data_type())) {
-                std::memset(dest, 0, sizeof(RawType) * m.num_rows_);
-            } else if constexpr (is_time_type(tag.data_type())) {
-                std::fill_n(ptr_dest, m.num_rows_, arcticdb::util::NaT);
-            } else {
-                static_assert(sizeof(tag) == 0, "Unhandled data_type");
-            }
+            util::default_initialize<decltype(tag)>(dest, dest_bytes);
         });
 
         util::variant_match(variant_field, [&input](const auto& field) {
@@ -121,6 +104,10 @@ namespace arcticdb {
         return sizeof(PyObject*);
     }
 
+    void EmptyHandler::default_initialize(void* dest, size_t byte_size) const {
+        fill_with_none(reinterpret_cast<const PyObject**>(dest), byte_size / type_size());
+    }
+
     void BoolHandler::handle_type(
         const uint8_t*& data,
         uint8_t* dest,
@@ -167,6 +154,10 @@ namespace arcticdb {
         return sizeof(PyObject*);
     }
 
+    void BoolHandler::default_initialize(void* dest, size_t byte_size) const {
+        fill_with_none(reinterpret_cast<const PyObject**>(dest), byte_size / type_size());
+    }
+
     std::mutex ArrayHandler::initialize_array_mutex;
 
     void ArrayHandler::handle_type(
@@ -241,4 +232,8 @@ namespace arcticdb {
     int ArrayHandler::type_size() const {
         return sizeof(PyObject*);
     }
+
+    void ArrayHandler::default_initialize(void* dest, size_t byte_size) const {
+        fill_with_none(reinterpret_cast<const PyObject**>(dest), byte_size / type_size());
+    }
 }
diff --git a/cpp/arcticdb/python/python_handlers.hpp b/cpp/arcticdb/python/python_handlers.hpp
index ac073ac69d..186c87cbd9 100644
--- a/cpp/arcticdb/python/python_handlers.hpp
+++ b/cpp/arcticdb/python/python_handlers.hpp
@@ -25,6 +25,7 @@ namespace arcticdb {
         );
 
         int type_size() const;
+        void default_initialize(void* dest, size_t byte_size) const;
     };
 
     struct BoolHandler {
@@ -39,6 +40,7 @@ namespace arcticdb {
             const ColumnMapping& columnMapping
         );
         int type_size() const;
+        void default_initialize(void* dest, size_t byte_size) const;
     };
 
     struct DecimalHandler {
@@ -66,6 +68,7 @@ namespace arcticdb {
             const ColumnMapping& columnMapping
         );
         int type_size() const;
+        void default_initialize(void* dest, size_t byte_size) const;
         static std::mutex initialize_array_mutex;
     };
 } //namespace arcticdb
diff --git a/cpp/arcticdb/util/sparse_utils.hpp b/cpp/arcticdb/util/sparse_utils.hpp
index 5b17bcfe5a..a7cf86e391 100644
--- a/cpp/arcticdb/util/sparse_utils.hpp
+++ b/cpp/arcticdb/util/sparse_utils.hpp
@@ -9,6 +9,7 @@
 
 #include <arcticdb/util/offset_string.hpp>
 #include <arcticdb/util/preprocess.hpp>
+#include <arcticdb/util/type_handler.hpp>
 
 #include <arcticdb/util/bitset.hpp>
 
@@ -16,7 +17,6 @@
 
 #include <bitmagic/bmserial.h>
 
-#include <algorithm>
 #include <cstdint>
 
 namespace arcticdb::util {
@@ -68,11 +68,8 @@ inline void expand_dense_buffer_using_bitmap(const util::BitMagic &bv, const uin
     }
 }
 
-// TODO: EmptyHandler::handle_type has similar procedure.
-//  * Should this be used instead?
-//  * What about nullable booleans and arrays?
 template <typename TagType>
-void default_initialize(uint8_t* data, size_t bytes) {
+inline void default_initialize(uint8_t* data, size_t bytes) {
     using RawType = typename TagType::DataTypeTag::raw_type;
     const auto num_rows ARCTICDB_UNUSED = bytes / sizeof(RawType);
     constexpr auto data_type = TagType::DataTypeTag::data_type;
@@ -80,11 +77,23 @@ void default_initialize(uint8_t* data, size_t bytes) {
     if constexpr (is_sequence_type(data_type)) {
         std::fill_n(type_ptr, num_rows, not_a_string());
     } else if constexpr (is_floating_point_type(data_type)) {
-        std::fill_n(type_ptr, num_rows, std::numeric_limits<double>::quiet_NaN());
+        std::fill_n(type_ptr, num_rows, std::numeric_limits<RawType>::quiet_NaN());
     } else if constexpr (is_time_type(data_type)) {
         std::fill_n(type_ptr, num_rows, NaT);
+    } else if constexpr (is_integer_type(data_type) || is_bool_type(data_type)) {
+        std::memset(data, 0, bytes);
     } else {
-        std::fill_n(data, bytes, 0);
+        constexpr TypeDescriptor type_descriptor = TagType::type_descriptor();
+        const std::shared_ptr<TypeHandler>& handler =
+            arcticdb::TypeHandlerRegistry::instance()->get_handler(type_descriptor);
+        if (handler) {
+            handler->default_initialize(data, bytes);
+        } else {
+            internal::raise<ErrorCode::E_INVALID_ARGUMENT>(
+                "Default initialization for {} is not implemented.",
+                type_descriptor
+            );
+        }
     }
 }
 
diff --git a/cpp/arcticdb/util/type_handler.cpp b/cpp/arcticdb/util/type_handler.cpp
index 1bf4734e3f..8a3a1d2e41 100644
--- a/cpp/arcticdb/util/type_handler.cpp
+++ b/cpp/arcticdb/util/type_handler.cpp
@@ -21,16 +21,16 @@ void TypeHandlerRegistry::init() {
 std::shared_ptr<TypeHandlerRegistry> TypeHandlerRegistry::instance_;
 std::once_flag TypeHandlerRegistry::init_flag_;
 
-std::shared_ptr<TypeHandler> TypeHandlerRegistry::get_handler(const util::TypeDescriptor& type_descriptor) const {
+std::shared_ptr<TypeHandler> TypeHandlerRegistry::get_handler(const entity::TypeDescriptor& type_descriptor) const {
     auto it = handlers_.find(type_descriptor);
     return it == std::end(handlers_) ? std::shared_ptr<TypeHandler>{} : it->second;
 }
 
-void TypeHandlerRegistry::register_handler(const util::TypeDescriptor& type_descriptor, TypeHandler&& handler) {
+void TypeHandlerRegistry::register_handler(const entity::TypeDescriptor& type_descriptor, TypeHandler&& handler) {
      handlers_.try_emplace(type_descriptor, std::make_shared<TypeHandler>(std::move(handler)));
 }
 
-size_t TypeHandlerRegistry::Hasher::operator()(const util::TypeDescriptor descriptor) const {
+size_t TypeHandlerRegistry::Hasher::operator()(const entity::TypeDescriptor descriptor) const {
     static_assert(sizeof(descriptor) == sizeof(uint16_t), "Cannot compute util::TypeDescriptor's hash. The size is wrong.");
     static_assert(sizeof(decltype(descriptor.data_type())) == 1);
     static_assert(sizeof(decltype(descriptor.dimension())) == 1);
diff --git a/cpp/arcticdb/util/type_handler.hpp b/cpp/arcticdb/util/type_handler.hpp
index 8a3d167bcd..87fcb6296f 100644
--- a/cpp/arcticdb/util/type_handler.hpp
+++ b/cpp/arcticdb/util/type_handler.hpp
@@ -8,8 +8,6 @@
 #pragma once
 
 #include <arcticdb/entity/types.hpp>
-#include <arcticdb/column_store/chunked_buffer.hpp>
-#include <arcticdb/util/buffer_holder.hpp>
 #include <arcticdb/codec/variant_encoded_field_collection.hpp>
 #include <arcticdb/pipeline/column_mapping.hpp>
 
@@ -20,6 +18,7 @@
 
 namespace arcticdb {
 
+struct BufferHolder;
 
 struct ITypeHandler {
     template<class Base>
@@ -53,10 +52,14 @@ struct ITypeHandler {
         int type_size() const {
             return folly::poly_call<1>(*this);
         }
+
+        void default_initialize(void* dest, size_t byte_size) const {
+            folly::poly_call<2>(*this, dest, byte_size);
+        }
     };
 
     template<class T>
-    using Members = folly::PolyMembers<&T::handle_type, &T::type_size>;
+    using Members = folly::PolyMembers<&T::handle_type, &T::type_size, &T::default_initialize>;
 };
 
 using TypeHandler = folly::Poly<ITypeHandler>;
@@ -71,13 +74,13 @@ class TypeHandlerRegistry {
     static void init();
     static std::shared_ptr<TypeHandlerRegistry> instance();
 
-    std::shared_ptr<TypeHandler> get_handler(const util::TypeDescriptor& type_descriptor) const;
-    void register_handler(const util::TypeDescriptor& type_descriptor, TypeHandler&& handler);
+    std::shared_ptr<TypeHandler> get_handler(const entity::TypeDescriptor& type_descriptor) const;
+    void register_handler(const entity::TypeDescriptor& type_descriptor, TypeHandler&& handler);
 private:
     struct Hasher {
-        size_t operator()(const util::TypeDescriptor val) const;
+        size_t operator()(const entity::TypeDescriptor val) const;
     };
-    std::unordered_map<util::TypeDescriptor, std::shared_ptr<TypeHandler>, Hasher> handlers_;
+    std::unordered_map<entity::TypeDescriptor, std::shared_ptr<TypeHandler>, Hasher> handlers_;
 };
 
 }

From e3d71cbe19579c2ed0d79dcf3b3e67c0bafe6049 Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Thu, 1 Feb 2024 14:45:30 +0200
Subject: [PATCH 12/30] Change pybool type name so a language agnostic name
 bool_object

---
 cpp/arcticdb/column_store/column_utils.hpp     |  5 ++---
 cpp/arcticdb/entity/type_utils.cpp             |  2 +-
 cpp/arcticdb/entity/types-inl.hpp              |  4 ++--
 cpp/arcticdb/entity/types.cpp                  |  2 +-
 cpp/arcticdb/entity/types.hpp                  | 12 ++++++------
 cpp/arcticdb/pipeline/frame_utils.hpp          |  2 +-
 cpp/arcticdb/processing/aggregation.cpp        |  4 ++--
 cpp/arcticdb/python/python_handlers.cpp        |  4 ----
 cpp/arcticdb/python/python_module.cpp          |  2 +-
 cpp/arcticdb/python/python_to_tensor_frame.cpp |  2 +-
 cpp/proto/arcticc/pb2/descriptors.proto        |  2 +-
 11 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/cpp/arcticdb/column_store/column_utils.hpp b/cpp/arcticdb/column_store/column_utils.hpp
index 351617a62b..90a35a1048 100644
--- a/cpp/arcticdb/column_store/column_utils.hpp
+++ b/cpp/arcticdb/column_store/column_utils.hpp
@@ -7,7 +7,6 @@
 
 #pragma once
 
-#include <arcticdb/column_store/column.hpp>
 #include <arcticdb/column_store/memory_segment.hpp>
 #include <arcticdb/pipeline/frame_data_wrapper.hpp>
 
@@ -44,7 +43,7 @@ inline py::array array_at(const SegmentInMemory& frame, std::size_t col_pos, py:
                 } else {
                     dtype = fmt::format("{}{:d}", get_dtype_specifier(data_type), esize);
                 }
-            } else if constexpr (is_empty_type(data_type) || is_py_bool_type(data_type) || is_numpy_array(TypeDescriptor(tag))) {
+            } else if constexpr (is_empty_type(data_type) || is_bool_object_type(data_type) || is_numpy_array(TypeDescriptor(tag))) {
                 dtype= "O";
                 // The python representation of multidimensional columns differs from the in-memory/on-storage. In memory,
                 // we hold all scalars in a contiguous buffer with the shapes buffer telling us how many elements are there
@@ -94,7 +93,7 @@ inline py::array array_at(const SegmentInMemory& frame, std::size_t col_pos, py:
             } else {
                 dtype = fmt::format("{}{:d}", get_dtype_specifier(data_type), esize);
             }
-        } else if constexpr (is_empty_type(data_type) || is_py_bool_type(data_type) || is_numpy_array(TypeDescriptor(tag))){
+        } else if constexpr (is_empty_type(data_type) || is_bool_object_type(data_type) || is_numpy_array(TypeDescriptor(tag))){
             dtype = "O";
             // The python representation of multidimensional columns differs from the in-memory/on-storage. In memory,
             // we hold all scalars in a contiguous buffer with the shapes buffer telling us how many elements are there
diff --git a/cpp/arcticdb/entity/type_utils.cpp b/cpp/arcticdb/entity/type_utils.cpp
index 715939590d..20fbb0cd05 100644
--- a/cpp/arcticdb/entity/type_utils.cpp
+++ b/cpp/arcticdb/entity/type_utils.cpp
@@ -120,7 +120,7 @@ namespace arcticdb {
             // Only allow promotion with UTF strings, and only to dynamic (never to fixed width)
             if (!is_utf_type(source_type) || !is_utf_type(target_type) || !is_dynamic_string_type(target_type))
                 return std::nullopt;
-        } else if (is_py_bool_type(source_type)) {
+        } else if (is_bool_object_type(source_type)) {
             return std::nullopt;
         } else {
             // Non-numeric source type
diff --git a/cpp/arcticdb/entity/types-inl.hpp b/cpp/arcticdb/entity/types-inl.hpp
index cb16fafbdf..671eee0a1d 100644
--- a/cpp/arcticdb/entity/types-inl.hpp
+++ b/cpp/arcticdb/entity/types-inl.hpp
@@ -39,7 +39,7 @@ constexpr auto visit_dim(DataType dt, Callable &&c) {
         DT_CASE(UTF_FIXED64)
         DT_CASE(UTF_DYNAMIC64)
         DT_CASE(EMPTYVAL)
-        DT_CASE(PYBOOL8)
+        DT_CASE(BOOL_OBJECT8)
 #undef DT_CASE
         default: util::raise_rte("Invalid dtype '{}' in visit dim", datatype_to_str(dt));
     }
@@ -67,7 +67,7 @@ auto visit_type(DataType dt, Callable &&c) {
         DT_CASE(UTF_FIXED64)
         DT_CASE(UTF_DYNAMIC64)
         DT_CASE(EMPTYVAL)
-        DT_CASE(PYBOOL8)
+        DT_CASE(BOOL_OBJECT8)
 #undef DT_CASE
     default: util::raise_rte("Invalid dtype '{}' in visit type", datatype_to_str(dt));
     }
diff --git a/cpp/arcticdb/entity/types.cpp b/cpp/arcticdb/entity/types.cpp
index 4f7593b23a..a03e45cdfd 100644
--- a/cpp/arcticdb/entity/types.cpp
+++ b/cpp/arcticdb/entity/types.cpp
@@ -37,7 +37,7 @@ std::string_view datatype_to_str(const DataType dt) {
         TO_STR(UTF_FIXED64)
         TO_STR(UTF_DYNAMIC64)
         TO_STR(EMPTYVAL)
-        TO_STR(PYBOOL8)
+        TO_STR(BOOL_OBJECT8)
 #undef TO_STR
         default:return std::string_view("UNKNOWN");
     }
diff --git a/cpp/arcticdb/entity/types.hpp b/cpp/arcticdb/entity/types.hpp
index 2a16a30df1..ca062495f1 100644
--- a/cpp/arcticdb/entity/types.hpp
+++ b/cpp/arcticdb/entity/types.hpp
@@ -128,7 +128,7 @@ enum class ValueType : uint8_t {
     // When data is appended, the column type is inferred from the data and the column is promoted to the inferred type.
     EMPTY = 13,
     /// Nullable booleans
-    PYBOOL = 14,
+    BOOL_OBJECT = 14,
     COUNT // Not a real value type, should not be added to proto descriptor. Used to count the number of items in the enum
 };
 
@@ -225,7 +225,7 @@ enum class DataType : uint8_t {
     UTF_FIXED64 = detail::combine_val_bits(ValueType::UTF8_FIXED, SizeBits::S64),
     UTF_DYNAMIC64 = detail::combine_val_bits(ValueType::UTF_DYNAMIC, SizeBits::S64),
     EMPTYVAL = detail::combine_val_bits(ValueType::EMPTY, SizeBits::S64),
-    PYBOOL8 = detail::combine_val_bits(ValueType::PYBOOL, SizeBits::S8),
+    BOOL_OBJECT8 = detail::combine_val_bits(ValueType::BOOL_OBJECT, SizeBits::S8),
     UNKNOWN = 0,
 };
 
@@ -279,8 +279,8 @@ constexpr bool is_bool_type(DataType dt) {
     return slice_value_type(dt) == ValueType::BOOL;
 }
 
-constexpr bool is_py_bool_type(DataType dt) {
-    return slice_value_type(dt) == ValueType::PYBOOL;
+constexpr bool is_bool_object_type(DataType dt) {
+    return slice_value_type(dt) == ValueType::BOOL_OBJECT;
 }
 
 constexpr bool is_unsigned_type(DataType dt) {
@@ -410,7 +410,7 @@ DATA_TYPE_TAG(ASCII_DYNAMIC64, std::uint64_t)
 DATA_TYPE_TAG(UTF_FIXED64, std::uint64_t)
 DATA_TYPE_TAG(UTF_DYNAMIC64, std::uint64_t)
 DATA_TYPE_TAG(EMPTYVAL, std::uint64_t)
-DATA_TYPE_TAG(PYBOOL8, uint8_t)
+DATA_TYPE_TAG(BOOL_OBJECT8, uint8_t)
 #undef DATA_TYPE_TAG
 
 enum class Dimension : uint8_t {
@@ -525,7 +525,7 @@ constexpr bool is_numpy_array(TypeDescriptor td) {
 }
 
 constexpr bool is_pyobject_type(TypeDescriptor td) {
-	return is_dynamic_string_type(slice_value_type(td.data_type())) || is_py_bool_type(td.data_type()) ||
+	return is_dynamic_string_type(slice_value_type(td.data_type())) || is_bool_object_type(td.data_type()) ||
 		is_numpy_array(td);
 }
 
diff --git a/cpp/arcticdb/pipeline/frame_utils.hpp b/cpp/arcticdb/pipeline/frame_utils.hpp
index c764063509..c95e020c1a 100644
--- a/cpp/arcticdb/pipeline/frame_utils.hpp
+++ b/cpp/arcticdb/pipeline/frame_utils.hpp
@@ -195,7 +195,7 @@ std::optional<convert::StringEncodingError> aggregator_set_data(
                     agg.set_array(col, t);
                 }
             }
-        } else if constexpr(is_py_bool_type(dt)) {
+        } else if constexpr(is_bool_object_type(dt)) {
             normalization::check<ErrorCode::E_UNIMPLEMENTED_INPUT_TYPE>(tag.dimension() == Dimension::Dim0,
                 "Multidimensional nullable booleans are not supported");
             auto data = const_cast<void *>(tensor.data());
diff --git a/cpp/arcticdb/processing/aggregation.cpp b/cpp/arcticdb/processing/aggregation.cpp
index 14368b8edf..4bc7ee46c5 100644
--- a/cpp/arcticdb/processing/aggregation.cpp
+++ b/cpp/arcticdb/processing/aggregation.cpp
@@ -139,8 +139,8 @@ namespace
     };
 
     template<>
-    struct OutputType<DataTypeTag<DataType::PYBOOL8>, void> {
-        using type = ScalarTagType<DataTypeTag<DataType::PYBOOL8>>;
+    struct OutputType<DataTypeTag<DataType::BOOL_OBJECT8>, void> {
+        using type = ScalarTagType<DataTypeTag<DataType::BOOL_OBJECT8>>;
     };
 }
 
diff --git a/cpp/arcticdb/python/python_handlers.cpp b/cpp/arcticdb/python/python_handlers.cpp
index fb2450b58d..e17f10274f 100644
--- a/cpp/arcticdb/python/python_handlers.cpp
+++ b/cpp/arcticdb/python/python_handlers.cpp
@@ -53,10 +53,6 @@ namespace arcticdb {
         return dest + count;
     }
 
-	constexpr inline bool is_nullable_type(TypeDescriptor td) {
-		return td.dimension() > Dimension::Dim0 || is_empty_type(td.data_type()) || is_py_bool_type(td.data_type());
-	}
-
     void EmptyHandler::handle_type(
         const uint8_t*& input,
         uint8_t* dest,
diff --git a/cpp/arcticdb/python/python_module.cpp b/cpp/arcticdb/python/python_module.cpp
index cc061dc5fe..762a74cb1b 100644
--- a/cpp/arcticdb/python/python_module.cpp
+++ b/cpp/arcticdb/python/python_module.cpp
@@ -291,7 +291,7 @@ void register_type_handlers() {
     for(const DataType& data_type : allowed_array_types) {
         TypeHandlerRegistry::instance()->register_handler(TypeDescriptor{data_type, Dimension::Dim1}, arcticdb::ArrayHandler());
     }
-    TypeHandlerRegistry::instance()->register_handler(TypeDescriptor{DataType::PYBOOL8, Dimension::Dim0}, arcticdb::BoolHandler());
+    TypeHandlerRegistry::instance()->register_handler(TypeDescriptor{DataType::BOOL_OBJECT8, Dimension::Dim0}, arcticdb::BoolHandler());
 }
 
 PYBIND11_MODULE(arcticdb_ext, m) {
diff --git a/cpp/arcticdb/python/python_to_tensor_frame.cpp b/cpp/arcticdb/python/python_to_tensor_frame.cpp
index ba7bf1e4d5..a4be90188c 100644
--- a/cpp/arcticdb/python/python_to_tensor_frame.cpp
+++ b/cpp/arcticdb/python/python_to_tensor_frame.cpp
@@ -49,7 +49,7 @@ std::variant<StringEncodingError, PyStringWrapper> pystring_to_buffer(PyObject *
 
 std::tuple<ValueType, uint8_t, ssize_t> determine_python_object_type(PyObject* obj) {
     if(is_py_boolean(obj))
-        return {ValueType::PYBOOL, 1, 1};
+        return {ValueType::BOOL_OBJECT, 1, 1};
 
     return {ValueType::BYTES, 8, 1};
 }
diff --git a/cpp/proto/arcticc/pb2/descriptors.proto b/cpp/proto/arcticc/pb2/descriptors.proto
index 17e0bd0b2a..0dcf769a35 100644
--- a/cpp/proto/arcticc/pb2/descriptors.proto
+++ b/cpp/proto/arcticc/pb2/descriptors.proto
@@ -31,7 +31,7 @@ message TypeDescriptor {
         DYNAMIC_STRING = 11; // offset reference to deduplicated string pool
         DYNAMIC_UTF8 = 12;
         EMPTY = 13;
-        PYBOOL = 14; // Nullable boolean
+        BOOL_OBJECT = 14; // Nullable boolean
     }
 
     enum SizeBits {

From 868c60779f24800de20cbd5ddf431ce2f1357c99 Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Thu, 1 Feb 2024 17:06:10 +0200
Subject: [PATCH 13/30] Skip nullable booleans in empty types tests

---
 .../tests/unit/arcticdb/version_store/test_empty_column_type.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/tests/unit/arcticdb/version_store/test_empty_column_type.py b/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
index 4618449edc..a999dc2eee 100644
--- a/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
+++ b/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
@@ -24,6 +24,8 @@ def float_dtype(request):
 # object is for nullable boolean
 @pytest.fixture(params=["object", "bool"])
 def boolean_dtype(request):
+    if request.param == "object":
+        pytest.skip("Nullable booleans are temporary disabled")
     yield request.param
 
 

From 5f244cad6ea881a49e46e72ff28223c8a4312e9c Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Thu, 1 Feb 2024 17:56:07 +0200
Subject: [PATCH 14/30] Fix compiler warnings

---
 cpp/arcticdb/pipeline/write_frame.cpp | 2 +-
 python/test.py                        | 7 -------
 2 files changed, 1 insertion(+), 8 deletions(-)
 delete mode 100644 python/test.py

diff --git a/cpp/arcticdb/pipeline/write_frame.cpp b/cpp/arcticdb/pipeline/write_frame.cpp
index 0268cef4f0..e89356295f 100644
--- a/cpp/arcticdb/pipeline/write_frame.cpp
+++ b/cpp/arcticdb/pipeline/write_frame.cpp
@@ -262,7 +262,7 @@ folly::Future<entity::AtomKey> append_frame(
             return index::write_index(stream::index_type_from_descriptor(frame->desc), std::move(tsd), std::move(slices_to_write), key, store);
         } else {
             const FieldCollection& new_fields{index_segment_reader.tsd().fields()};
-            for (int i = 0; i < new_fields.size(); ++i) {
+            for (size_t i = 0; i < new_fields.size(); ++i) {
                 const Field& new_field = new_fields.at(i);
                 TypeDescriptor& original_type = frame->desc.mutable_field(i).mutable_type();
                 if (is_empty_type(original_type.data_type()) && !is_empty_type(new_field.type().data_type())) {
diff --git a/python/test.py b/python/test.py
deleted file mode 100644
index 29127d28a3..0000000000
--- a/python/test.py
+++ /dev/null
@@ -1,7 +0,0 @@
-from arcticdb import Arctic
-import pandas as pd
-import numpy as np
-ac = Arctic("lmdb://test")
-l = ac.get_library("test", create_if_missing=True)
-df = pd.DataFrame({"col": [1,2,3]})
-l.write("test", df)
\ No newline at end of file

From 3d8c784e79890b2c1a7143a442510a3c350f639d Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Fri, 2 Feb 2024 12:19:44 +0200
Subject: [PATCH 15/30] Work on matching compiler flags

---
 cpp/arcticdb/CMakeLists.txt             |  9 ++++++-
 cpp/arcticdb/codec/zstd.hpp             | 35 ++++++++++++++-----------
 cpp/arcticdb/processing/aggregation.cpp | 12 ++++++---
 cpp/arcticdb/python/python_handlers.cpp |  9 +++----
 cpp/arcticdb/util/sparse_utils.hpp      |  2 +-
 cpp/arcticdb/util/trace.cpp             |  8 ++----
 cpp/arcticdb/util/trace.hpp             |  6 +----
 7 files changed, 43 insertions(+), 38 deletions(-)

diff --git a/cpp/arcticdb/CMakeLists.txt b/cpp/arcticdb/CMakeLists.txt
index f647766e07..4466c11a88 100644
--- a/cpp/arcticdb/CMakeLists.txt
+++ b/cpp/arcticdb/CMakeLists.txt
@@ -157,7 +157,7 @@ ENDIF()
 
 set(CMAKE_INSTALL_RPATH "\\$ORIGIN/lib")
 
-if (NOT WIN32)
+if (NOT MSVC)
     if(NOT ${ARCTICDB_USING_CONDA})
         # Compilers on conda-forge are relatively strict.
         # For now, we do not want to treat warnings as errors for those builds.
@@ -166,6 +166,13 @@ if (NOT WIN32)
     add_compile_options("-ggdb")
     add_compile_options("-fvisibility=hidden")
     add_compile_options("-fno-omit-frame-pointer")
+else()
+    add_compile_options(
+        /WX
+        /wd4250 # Suppress inherits via dominance warning
+        /we4189 # Unused local variable is an error
+        /we4100 # Unused function parameter is an error
+    )
 endif ()
 
 ## Core library without python bindings ##
diff --git a/cpp/arcticdb/codec/zstd.hpp b/cpp/arcticdb/codec/zstd.hpp
index 712c26543d..4f69ff07d2 100644
--- a/cpp/arcticdb/codec/zstd.hpp
+++ b/cpp/arcticdb/codec/zstd.hpp
@@ -51,28 +51,31 @@ struct ZstdBlockEncoder {
 
 struct ZstdDecoder {
 
-/*
- * encoder_version is here to support multiple versions but won't be used before we have them
- */
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunused-parameter"
-
-    template<typename T>
+    /// @param[in] encoder_version Used to support multiple versions but won't be used before we have them
+    template <typename T>
     static void decode_block(
-        std::uint32_t encoder_version,
-        const std::uint8_t *in,
+        [[maybe_unused]] std::uint32_t encoder_version,
+        const std::uint8_t* in,
         std::size_t in_bytes,
-        T *t_out,
-        std::size_t out_bytes) {
+        T* t_out,
+        std::size_t out_bytes
+    ) {
 
         const std::size_t decomp_size = ZSTD_getFrameContentSize(in, in_bytes);
-        codec::check<ErrorCode::E_DECODE_ERROR>(decomp_size == out_bytes, "expected out_bytes == zstd deduced bytes, actual {} != {}",
-                        out_bytes, decomp_size);
+        codec::check<ErrorCode::E_DECODE_ERROR>(
+            decomp_size == out_bytes,
+            "expected out_bytes == zstd deduced bytes, actual {} != {}",
+            out_bytes,
+            decomp_size
+        );
         std::size_t real_decomp = ZSTD_decompress(t_out, out_bytes, in, in_bytes);
-        codec::check<ErrorCode::E_DECODE_ERROR>(real_decomp == out_bytes, "expected out_bytes == zstd decompressed bytes, actual {} != {}",
-                        out_bytes, real_decomp);
+        codec::check<ErrorCode::E_DECODE_ERROR>(
+            real_decomp == out_bytes,
+            "expected out_bytes == zstd decompressed bytes, actual {} != {}",
+            out_bytes,
+            real_decomp
+        );
     }
-#pragma GCC diagnostic pop
 };
 
 } // namespace arcticdb::detail
diff --git a/cpp/arcticdb/processing/aggregation.cpp b/cpp/arcticdb/processing/aggregation.cpp
index 4bc7ee46c5..87905011a3 100644
--- a/cpp/arcticdb/processing/aggregation.cpp
+++ b/cpp/arcticdb/processing/aggregation.cpp
@@ -173,13 +173,13 @@ void SumAggregatorData::aggregate(const std::optional<ColumnWithStrings>& input_
     if (!data_type_.has_value() || *data_type_ == DataType::EMPTYVAL) {
         data_type_ = DataType::FLOAT64;
     }
-    entity::details::visit_type(*data_type_, [&input_column, unique_values, &groups, that=this] (auto global_type_desc_tag) {
+    entity::details::visit_type(*data_type_, [&input_column, unique_values, &groups, this] (auto global_type_desc_tag) {
         using GlobalInputType = decltype(global_type_desc_tag);
         if constexpr(!is_sequence_type(GlobalInputType::DataTypeTag::data_type)) {
             using GlobalTypeDescriptorTag =  typename OutputType<GlobalInputType>::type;
             using GlobalRawType = typename GlobalTypeDescriptorTag::DataTypeTag::raw_type;
-            that->aggregated_.resize(sizeof(GlobalRawType)* unique_values);
-            auto out_ptr = reinterpret_cast<GlobalRawType*>(that->aggregated_.data());
+            aggregated_.resize(sizeof(GlobalRawType)* unique_values);
+            auto out_ptr = reinterpret_cast<GlobalRawType*>(aggregated_.data());
             if (input_column.has_value()) {
                 entity::details::visit_type(input_column->column_->type().data_type(), [&input_column, &groups, &out_ptr] (auto type_desc_tag) {
                     using ColumnTagType = std::decay_t<decltype(type_desc_tag)>;
@@ -190,7 +190,11 @@ void SumAggregatorData::aggregate(const std::optional<ColumnWithStrings>& input_
                             while (auto block = col_data.next<TypeDescriptorTag<ColumnTagType, DimensionTag<entity::Dimension::Dim0>>>()) {
                                 auto ptr = reinterpret_cast<const ColumnType *>(block.value().data());
                                 for (auto i = 0u; i < block.value().row_count(); ++i, ++ptr, ++iter) {
-                                    out_ptr[groups[deref(iter)]] += GlobalRawType(*ptr);
+                                    if constexpr (std::is_same_v<GlobalRawType, bool>) {
+                                        out_ptr[groups[deref(iter)]] |= GlobalRawType(*ptr);
+                                    } else {
+                                        out_ptr[groups[deref(iter)]] += GlobalRawType(*ptr);
+                                    }
                                 }
                             }
                         };
diff --git a/cpp/arcticdb/python/python_handlers.cpp b/cpp/arcticdb/python/python_handlers.cpp
index e17f10274f..4e9aca7f33 100644
--- a/cpp/arcticdb/python/python_handlers.cpp
+++ b/cpp/arcticdb/python/python_handlers.cpp
@@ -108,7 +108,7 @@ namespace arcticdb {
         const uint8_t*& data,
         uint8_t* dest,
         const VariantField& encoded_field_info,
-        size_t dest_bytes,
+        size_t,
         std::shared_ptr<BufferHolder>,
         EncodingVersion encding_version,
         const ColumnMapping& m
@@ -160,7 +160,7 @@ namespace arcticdb {
         const uint8_t*& data,
         uint8_t* dest,
         const VariantField& encoded_field_info,
-        size_t dest_bytes,
+        size_t,
         std::shared_ptr<BufferHolder> buffers,
         EncodingVersion encoding_version,
         const ColumnMapping& m
@@ -168,12 +168,11 @@ namespace arcticdb {
         util::variant_match(encoded_field_info, [&](auto field){
             ARCTICDB_SAMPLE(HandleArray, 0)
             util::check(field->has_ndarray(), "Expected ndarray in array object handler");
-            const auto row_count = dest_bytes / sizeof(PyObject*);
 
             auto ptr_dest = reinterpret_cast<const PyObject**>(dest);
             if(!field->ndarray().sparse_map_bytes()) {
                 log::version().info("Array handler has no values");
-                fill_with_none(ptr_dest, row_count);
+                fill_with_none(ptr_dest, m.num_rows_);
                 return;
             }
             std::shared_ptr<Column> column = buffers->get_buffer(m.source_type_desc_, true);
@@ -221,7 +220,7 @@ namespace arcticdb {
             });
 
             ARCTICDB_SUBSAMPLE(ArrayIncNones, 0)
-            fill_with_none(ptr_dest, row_count - last_row);
+            fill_with_none(ptr_dest, m.num_rows_ - last_row);
         });
     }
 
diff --git a/cpp/arcticdb/util/sparse_utils.hpp b/cpp/arcticdb/util/sparse_utils.hpp
index a020dd2abb..13bb5a78b0 100644
--- a/cpp/arcticdb/util/sparse_utils.hpp
+++ b/cpp/arcticdb/util/sparse_utils.hpp
@@ -107,7 +107,7 @@ ChunkedBuffer scan_floating_point_to_sparse(
     util::BitMagic& block_bitset) {
     auto scan_ptr = ptr;
     for (size_t idx = 0; idx < rows_to_write; ++idx, ++scan_ptr) {
-        block_bitset[bv_size(idx)] = isnan(*scan_ptr) <= 0;
+        block_bitset[bv_size(idx)] = !isnan(*scan_ptr);
     }
 
     const auto bytes = block_bitset.count() * sizeof(RawType);
diff --git a/cpp/arcticdb/util/trace.cpp b/cpp/arcticdb/util/trace.cpp
index 41f14e050d..5b1c6991a0 100644
--- a/cpp/arcticdb/util/trace.cpp
+++ b/cpp/arcticdb/util/trace.cpp
@@ -11,20 +11,16 @@
 #include <cxxabi.h>
 #endif
 
-#include <iostream>
-
 namespace arcticdb {
 
-    std::string get_type_name(const std::type_info & ti){
+    std::string get_type_name(const std::type_info& ti){
 #ifndef _WIN32
         char* demangled = abi::__cxa_demangle(ti.name(), nullptr, nullptr, nullptr);
         std::string ret = demangled;
         free(demangled);
         return ret;
 #else
-        //TODO: Implement get_name_type for windows
-        return std::string("");
+        return ti.name();
 #endif
     }
-
 }
diff --git a/cpp/arcticdb/util/trace.hpp b/cpp/arcticdb/util/trace.hpp
index 17bfbf7ed0..146c9c9c91 100644
--- a/cpp/arcticdb/util/trace.hpp
+++ b/cpp/arcticdb/util/trace.hpp
@@ -8,14 +8,10 @@
 #pragma once
 
 #include <string>
-#include <ostream>
-#include <fstream>
-#include <fmt/format.h>
-#include <stdexcept>
 #include <typeinfo>
 
 namespace arcticdb {
 
-std::string get_type_name(const std::type_info & );
+std::string get_type_name(const std::type_info& type_info);
 
 }

From 8d0484ad8247c8af2ed29ae850885dfc7cc0209e Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Fri, 2 Feb 2024 19:42:00 +0200
Subject: [PATCH 16/30] Add tests for updating an empty column

---
 cpp/arcticdb/codec/codec.cpp                  |  20 ++-
 cpp/arcticdb/column_store/column.cpp          | 126 +++++++++------
 cpp/arcticdb/column_store/column.hpp          |  22 +--
 .../version_store/test_empty_column_type.py   | 151 +++++++++++++++++-
 4 files changed, 257 insertions(+), 62 deletions(-)

diff --git a/cpp/arcticdb/codec/codec.cpp b/cpp/arcticdb/codec/codec.cpp
index 4d0edd4e8d..395479502c 100644
--- a/cpp/arcticdb/codec/codec.cpp
+++ b/cpp/arcticdb/codec/codec.cpp
@@ -389,14 +389,26 @@ void decode_v1(
         for (int i = 0; i < fields_size; ++i) {
             const auto& field = hdr.fields(i);
             const auto& field_name = desc.fields(i).name();
-            util::check(data!=end, "Reached end of input block with {} fields to decode", fields_size-i);
-            if(auto col_index = res.column_index(field_name)) {
+            if (auto col_index = res.column_index(field_name)) {
                 auto& col = res.column(static_cast<position_t>(*col_index));
-                data += decode_field(res.field(*col_index).type(), field, data, col, col.opt_sparse_map(), to_encoding_version(hdr.encoding_version()));
+                util::check(
+                    data != end || is_empty_type(col.type().data_type()),
+                    "Reached end of input block with {} fields to decode",
+                    fields_size - i
+                );
+                data += decode_field(
+                    res.field(*col_index).type(),
+                    field,
+                    data,
+                    col,
+                    col.opt_sparse_map(),
+                    to_encoding_version(hdr.encoding_version())
+                );
             } else {
+                util::check(data != end, "Reached end of input block with {} fields to decode", fields_size - i);
                 data += encoding_sizes::field_compressed_size(field);
             }
-            ARCTICDB_TRACE(log::codec(), "Decoded column {} to position {}", i, data-begin);
+            ARCTICDB_TRACE(log::codec(), "Decoded column {} to position {}", i, data - begin);
         }
 
         decode_string_pool(hdr, data, begin, end, res);
diff --git a/cpp/arcticdb/column_store/column.cpp b/cpp/arcticdb/column_store/column.cpp
index 3be9d17f0c..c5ccc192ba 100644
--- a/cpp/arcticdb/column_store/column.cpp
+++ b/cpp/arcticdb/column_store/column.cpp
@@ -512,63 +512,89 @@ std::vector<std::shared_ptr<Column>> Column::split(const std::shared_ptr<Column>
     return output;
 }
 
-std::shared_ptr<Column> Column::truncate(const std::shared_ptr<Column>& column, size_t start_row, size_t end_row) {
-    auto type_size = get_type_size(column->type().data_type());
-    // Bytes from the underlying chunked buffer to include. Inclusive of start_byte, exclusive of end_byte
-    size_t start_byte;
-    size_t end_byte;
-    util::BitMagic input_sparse_map;
-    if (column->is_sparse()) {
-        input_sparse_map = column->sparse_map();
-        internal::check<ErrorCode::E_ASSERTION_FAILURE>(input_sparse_map.size() > 0, "Unexpected empty sparse map in Column::truncate");
-        // Sparse columns do not include trailing 0s in the bitset, so the relevant end_row is capped at the size of the biset
+/// Bytes from the underlying chunked buffer to include when truncating. Inclusive of start_byte, exclusive of end_byte
+static [[nodiscard]] std::pair<size_t, size_t> column_start_end_bytes(
+    const Column& column,
+    size_t start_row,
+    size_t end_row
+) {
+    const size_t type_size = get_type_size(column.type().data_type());
+    size_t start_byte = start_row * type_size;
+    size_t end_byte = end_row * type_size;
+    if (column.is_sparse()) {
+        const util::BitMagic& input_sparse_map = column.sparse_map();
+        internal::check<ErrorCode::E_ASSERTION_FAILURE>(
+            input_sparse_map.size() > 0,
+            "Unexpected empty sparse map in Column::truncate"
+        );
+        // Sparse columns do not include trailing 0s in the bitset, so the relevant end_row is capped at the size of the
+        // biset
         end_row = std::min(end_row, static_cast<size_t>(input_sparse_map.size()));
         // count_range is inclusive at both ends
-        auto set_bits_before_range = start_row > 0 ? input_sparse_map.count_range(0, start_row - 1) : 0;
-        auto set_bits_in_range = input_sparse_map.count_range(start_row, end_row - 1);
+        const size_t set_bits_before_range = start_row > 0 ? input_sparse_map.count_range(0, start_row - 1) : 0;
+        const size_t set_bits_in_range = input_sparse_map.count_range(start_row, end_row - 1);
         start_byte = set_bits_before_range * type_size;
         end_byte = start_byte + (set_bits_in_range * type_size);
-    } else {
-        start_byte = start_row * type_size;
-        end_byte = end_row * type_size;
     }
     internal::check<ErrorCode::E_ASSERTION_FAILURE>(
-            start_byte % type_size == 0,
-            "start_byte {} is not a multiple of type size {}", start_byte, column->type());
+        start_byte % type_size == 0,
+        "start_byte {} is not a multiple of type size {}",
+        start_byte,
+        column.type()
+    );
     internal::check<ErrorCode::E_ASSERTION_FAILURE>(
-            end_byte % type_size == 0,
-            "start_byte {} is not a multiple of type size {}", end_byte, column->type());
+        end_byte % type_size == 0,
+        "start_byte {} is not a multiple of type size {}",
+        end_byte,
+        column.type()
+    );
+    return {start_byte, end_byte};
+}
+
+static [[nodiscard]] util::BitMagic truncate_sparse_map(
+    const util::BitMagic& input_sparse_map,
+    size_t start_row,
+    size_t end_row
+) {
+    // The output sparse map is the slice [start_row, end_row) of the input sparse map
+    // BitMagic doesn't have a method for this, so hand-roll it here
+    // Ctor parameter is the size
+    util::BitMagic output_sparse_map(end_row - start_row);
+    util::BitSet::bulk_insert_iterator inserter(output_sparse_map);
+    util::BitSetSizeType set_input_bit;
+    if (start_row == 0) {
+        // get_first can return 0 if no bits are set, but we checked earlier that input_sparse_map.size() > 0,
+        // and we do not have sparse maps with no bits set (except for the empty type)
+        set_input_bit = input_sparse_map.get_first();
+        if (set_input_bit < end_row) {
+            inserter = set_input_bit;
+        }
+    } else {
+        set_input_bit = input_sparse_map.get_next(start_row - 1);
+        // get_next returns 0 if no more bits are set
+        if (set_input_bit != 0 && set_input_bit < end_row) {
+            // Shift start_row elements to the left
+            inserter = set_input_bit - start_row;
+        }
+    }
+    do {
+        set_input_bit = input_sparse_map.get_next(set_input_bit);
+        if (set_input_bit != 0 && set_input_bit < end_row) {
+            inserter = set_input_bit - start_row;
+        }
+    } while (set_input_bit != 0);
+    inserter.flush();
+    return output_sparse_map;
+}
+
+std::shared_ptr<Column> Column::truncate(const std::shared_ptr<Column>& column, size_t start_row, size_t end_row) {
+    const auto [start_byte, end_byte] = column_start_end_bytes(*column, start_row, end_row);
     auto buffer = ::arcticdb::truncate(column->data_.buffer(), start_byte, end_byte);
     auto res = std::make_shared<Column>(column->type(), column->allow_sparse_, std::move(buffer));
     if (column->is_sparse()) {
-        // The output sparse map is the slice [start_row, end_row) of the input sparse map
-        // BitMagic doesn't have a method for this, so hand-roll it here
-        // Ctor parameter is the size
-        util::BitMagic output_sparse_map(end_row - start_row);
-        util::BitSet::bulk_insert_iterator inserter(output_sparse_map);
-        util::BitSetSizeType set_input_bit;
-        if (start_row == 0) {
-            // get_first can return 0 if no bits are set, but we checked earlier that input_sparse_map.size() > 0,
-            // and we do not have sparse maps with no bits set
-            set_input_bit = input_sparse_map.get_first();
-            if (set_input_bit < end_row) {
-                inserter = set_input_bit;
-            }
-        } else {
-            set_input_bit = input_sparse_map.get_next(start_row - 1);
-            // get_next returns 0 if no more bits are set
-            if (set_input_bit != 0 && set_input_bit < end_row) {
-                // Shift start_row elements to the left
-                inserter = set_input_bit - start_row;
-            }
-        }
-        do {
-            set_input_bit = input_sparse_map.get_next(set_input_bit);
-            if (set_input_bit != 0 && set_input_bit < end_row) {
-                inserter = set_input_bit - start_row;
-            }
-        } while (set_input_bit != 0);
-        inserter.flush();
+        util::BitMagic output_sparse_map = is_empty_type(column->type().data_type())
+            ? util::BitMagic{}
+            : truncate_sparse_map(column->sparse_map(), start_row, end_row);
         res->set_sparse_map(std::move(output_sparse_map));
     }
     res->set_row_data(end_row - (start_row + 1));
@@ -672,7 +698,7 @@ void Column::regenerate_offsets() const {
     }
 }
 
-[[nodiscard]] util::BitMagic& Column::sparse_map() {
+util::BitMagic& Column::sparse_map() {
     if(!sparse_map_)
         sparse_map_ = std::make_optional<util::BitMagic>(0);
 
@@ -684,4 +710,8 @@ const util::BitMagic& Column::sparse_map() const {
     return sparse_map_.value();
 }
 
+std::optional<util::BitMagic>& Column::opt_sparse_map() {
+    return sparse_map_;
+}
+
 } //namespace arcticdb
diff --git a/cpp/arcticdb/column_store/column.hpp b/cpp/arcticdb/column_store/column.hpp
index 9ca1d62712..03bcf65721 100644
--- a/cpp/arcticdb/column_store/column.hpp
+++ b/cpp/arcticdb/column_store/column.hpp
@@ -239,9 +239,9 @@ class Column {
         sparse_map().set_range(0, bv_size(to_row), true);
     }
 
-    std::optional<util::BitMagic>& opt_sparse_map() {
-        return sparse_map_;
-    }
+    [[nodiscard]] util::BitMagic& sparse_map();
+    [[nodiscard]] const util::BitMagic& sparse_map() const;
+    [[nodiscard]] std::optional<util::BitMagic>& opt_sparse_map();
 
     template<typename TagType>
     auto begin() const {
@@ -646,11 +646,18 @@ class Column {
         return *res;
     }
 
-    static std::vector<std::shared_ptr<Column>> split(const std::shared_ptr<Column>& column, size_t num_rows);
-    /// @biref Produces a new column containing only the data in range [start_row, end_row)
+    [[nodiscard]] static std::vector<std::shared_ptr<Column>> split(
+        const std::shared_ptr<Column>& column,
+        size_t num_rows
+    );
+    /// @brief Produces a new column containing only the data in range [start_row, end_row)
     /// @param[in] start_row Inclusive start of the row range
     /// @param[in] end_row Exclusive end of the row range
-    static std::shared_ptr<Column> truncate(const std::shared_ptr<Column>& column, size_t start_row, size_t end_row);
+    [[nodiscard]] static std::shared_ptr<Column> truncate(
+        const std::shared_ptr<Column>& column,
+        size_t start_row,
+        size_t end_row
+    );
 
 private:
 
@@ -670,9 +677,6 @@ class Column {
     // Permutes the physical column storage based on the given sorted_pos.
     void physical_sort_external(std::vector<uint32_t> &&sorted_pos);
 
-    [[nodiscard]] util::BitMagic& sparse_map();
-    const util::BitMagic& sparse_map() const;
-
     // Members
     CursoredBuffer<ChunkedBuffer> data_;
     CursoredBuffer<Buffer> shapes_;
diff --git a/python/tests/unit/arcticdb/version_store/test_empty_column_type.py b/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
index a999dc2eee..58cccc2cb4 100644
--- a/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
+++ b/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
@@ -37,7 +37,7 @@ def date_dtype(request):
 class TestCanAppendToEmptyColumn:
     @pytest.fixture(autouse=True)
     def create_empty_column(self, lmdb_version_store_static_and_dynamic):
-        lmdb_version_store_static_and_dynamic.write("sym", pd.DataFrame({"col1": [None, None]}))
+        lmdb_version_store_static_and_dynamic.write("sym", pd.DataFrame({"col1": 2 * [None]}))
         yield
 
     def test_integer(self, lmdb_version_store_static_and_dynamic, int_dtype):
@@ -357,6 +357,155 @@ def test_date(self, lmdb_version_store_static_and_dynamic, date_dtype):
             )
         )
 
+
+class TestCanUpdateEmpty:
+    """
+    Check that a column containing only empty type and date index can be updated and that
+    the type of the column will be changed to the type of the new data. The update range
+    is right in the middle of the initial date range, so there will be values unaffected
+    by the update. Check that on read the type of the unaffected entries will be changed
+    from empty to the new type of the column.
+    """
+
+
+    def index(self):
+        return list(pd.date_range(start="1/1/2024", end="1/4/2024"))
+
+    def update_index(self):
+        return list(pd.date_range(start="1/2/2024", end="1/3/2024"))
+
+    @pytest.fixture(autouse=True)
+    def create_empty_column(self, lmdb_version_store_static_and_dynamic):
+        lmdb_version_store_static_and_dynamic.write("sym", pd.DataFrame({"col": 4 * [None]}, index=self.index()))
+        yield
+
+    def test_integer(self, lmdb_version_store_static_and_dynamic, int_dtype):
+        lmdb_version_store_static_and_dynamic.update(
+            "sym",
+            pd.DataFrame({"col": [1, 2]}, dtype=int_dtype, index=self.update_index())
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym").data,
+            pd.DataFrame({"col": [0,1,2,0]}, dtype=int_dtype, index=self.index())
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read(
+                "sym",
+                date_range=(pd.to_datetime("1/1/2024"), pd.to_datetime("1/1/2024"))
+            ).data,
+            pd.DataFrame({"col": [0]}, dtype=int_dtype, index=[pd.to_datetime("1/1/2024")])
+        )
+
+    def test_float(self, lmdb_version_store_static_and_dynamic, float_dtype):
+        lmdb_version_store_static_and_dynamic.update(
+            "sym",
+            pd.DataFrame({"col": [1, 2]}, dtype=float_dtype, index=self.update_index())
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym").data,
+            pd.DataFrame(
+                {"col": [float("NaN"), 1, 2, float("NaN")]},
+                dtype=float_dtype,
+                index=self.index()
+            )
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read(
+                "sym",
+                date_range=(pd.to_datetime("1/1/2024"), pd.to_datetime("1/1/2024"))
+            ).data,
+            pd.DataFrame({"col": [float("NaN")]}, dtype=float_dtype, index=[pd.to_datetime("1/1/2024")])
+        )
+
+    def test_bool(self, lmdb_version_store_static_and_dynamic, boolean_dtype):
+        lmdb_version_store_static_and_dynamic.update(
+            "sym",
+            pd.DataFrame({"col": [True, False]}, dtype=boolean_dtype, index=self.update_index())
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym").data,
+            pd.DataFrame(
+                {"col": [None, True, False, None]},
+                dtype=boolean_dtype,
+                index=self.index()
+            )
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read(
+                "sym",
+                date_range=(pd.to_datetime("1/1/2024"), pd.to_datetime("1/1/2024"))
+            ).data,
+            pd.DataFrame({"col": [None]}, dtype=boolean_dtype, index=[pd.to_datetime("1/1/2024")])
+        )
+
+    def test_string(self, lmdb_version_store_static_and_dynamic):
+        lmdb_version_store_static_and_dynamic.update(
+            "sym",
+            pd.DataFrame({"col": ["a", 20*"long_string"]}, index=self.update_index())
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym").data,
+            pd.DataFrame(
+                {"col": [None, "a", 20*"long_string", None]},
+                index=self.index()
+            )
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read(
+                "sym",
+                date_range=(pd.to_datetime("1/1/2024"), pd.to_datetime("1/1/2024"))
+            ).data,
+            pd.DataFrame({"col": [None]}, index=[pd.to_datetime("1/1/2024")])
+        )
+
+    def test_empty(self, lmdb_version_store_static_and_dynamic):
+        lmdb_version_store_static_and_dynamic.update(
+            "sym",
+            pd.DataFrame({"col": 2*[None]}, index=self.update_index())
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym").data,
+            pd.DataFrame({"col": 4*[None]}, index=self.index())
+        )
+
+    def test_date(self, lmdb_version_store_static_and_dynamic, date_dtype):
+        lmdb_version_store_static_and_dynamic.update(
+            "sym",
+            pd.DataFrame(
+                {"col": [np.datetime64('2005-02'), np.datetime64('2005-03')]},
+                dtype=date_dtype,
+                index=self.update_index()
+            )
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read("sym").data,
+            pd.DataFrame(
+                {
+                    "col": [
+                        np.datetime64('NaT'),
+                        np.datetime64('2005-02'),
+                        np.datetime64('2005-03'),
+                        np.datetime64('NaT')
+                    ]
+                },
+                index=self.index(),
+                dtype=date_dtype
+            )
+        )
+        assert_frame_equal(
+            lmdb_version_store_static_and_dynamic.read(
+                "sym",
+                date_range=(pd.to_datetime("1/1/2024"), pd.to_datetime("1/1/2024"))
+            ).data,
+            pd.DataFrame(
+                {
+                    "col": [np.datetime64('NaT', 'ns')]
+                },
+                index=[pd.to_datetime("1/1/2024")],
+                dtype=date_dtype
+            )
+        )
+
 class TestEmptyTypesIsOverriden:
     def test_cannot_append_different_type_after_first_not_none(self, lmdb_version_store_static_and_dynamic):
         lmdb_version_store_static_and_dynamic.write("sym", pd.DataFrame({"col": [None, None]}))

From 634347274431977ff79864894fca4b329b2886ca Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Fri, 2 Feb 2024 20:35:03 +0200
Subject: [PATCH 17/30] Fix conda builds

---
 cpp/arcticdb/column_store/column.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/arcticdb/column_store/column.cpp b/cpp/arcticdb/column_store/column.cpp
index c5ccc192ba..e3e7102973 100644
--- a/cpp/arcticdb/column_store/column.cpp
+++ b/cpp/arcticdb/column_store/column.cpp
@@ -513,7 +513,7 @@ std::vector<std::shared_ptr<Column>> Column::split(const std::shared_ptr<Column>
 }
 
 /// Bytes from the underlying chunked buffer to include when truncating. Inclusive of start_byte, exclusive of end_byte
-static [[nodiscard]] std::pair<size_t, size_t> column_start_end_bytes(
+[[nodiscard]] static std::pair<size_t, size_t> column_start_end_bytes(
     const Column& column,
     size_t start_row,
     size_t end_row
@@ -551,7 +551,7 @@ static [[nodiscard]] std::pair<size_t, size_t> column_start_end_bytes(
     return {start_byte, end_byte};
 }
 
-static [[nodiscard]] util::BitMagic truncate_sparse_map(
+[[nodiscard]] static util::BitMagic truncate_sparse_map(
     const util::BitMagic& input_sparse_map,
     size_t start_row,
     size_t end_row

From 0a36fe692dd7e1871fdbbdbad61bc1e079315af9 Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Mon, 5 Feb 2024 11:30:13 +0200
Subject: [PATCH 18/30] Fix MSVC release builds

---
 cpp/arcticdb/storage/library.hpp           | 14 ++++++++++----
 cpp/arcticdb/storage/lmdb/lmdb_storage.cpp |  2 +-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/cpp/arcticdb/storage/library.hpp b/cpp/arcticdb/storage/library.hpp
index 05e339e8c8..0dfea0c59d 100644
--- a/cpp/arcticdb/storage/library.hpp
+++ b/cpp/arcticdb/storage/library.hpp
@@ -72,8 +72,11 @@ class Library {
         if (open_mode() < OpenMode::WRITE)
             throw PermissionException(library_path_, open_mode(), "write");
 
-        size_t ARCTICDB_UNUSED total_size = kvs.fold([] (size_t s, const KeySegmentPair& seg) { return s + seg.segment().total_segment_size(); }, size_t(0));
-        auto kv_count ARCTICDB_UNUSED = kvs.size();
+        [[maybe_unused]] const size_t total_size = kvs.fold(
+            [](size_t s, const KeySegmentPair& seg) { return s + seg.segment().total_segment_size(); },
+            size_t(0)
+        );
+        [[maybe_unused]] const auto kv_count = kvs.size();
         storages_->write(std::move(kvs));
         ARCTICDB_TRACE(log::storage(), "{} kv written, {} bytes", kv_count, total_size);
     }
@@ -83,8 +86,11 @@ class Library {
         if (open_mode() < OpenMode::WRITE)
             throw PermissionException(library_path_, open_mode(), "update");
 
-        size_t total_size ARCTICDB_UNUSED = kvs.fold([] (size_t s, const KeySegmentPair& seg) { return s + seg.segment().total_segment_size(); }, size_t(0));
-        auto kv_count ARCTICDB_UNUSED = kvs.size();
+        [[maybe_unused]] const size_t total_size = kvs.fold(
+            [](size_t s, const KeySegmentPair& seg) { return s + seg.segment().total_segment_size(); },
+            size_t(0)
+        );
+        [[maybe_unused]] const auto kv_count = kvs.size();
         storages_->update(std::move(kvs), opts);
         ARCTICDB_TRACE(log::storage(), "{} kv updated, {} bytes", kv_count, total_size);
     }
diff --git a/cpp/arcticdb/storage/lmdb/lmdb_storage.cpp b/cpp/arcticdb/storage/lmdb/lmdb_storage.cpp
index 30c86ee873..dbf6b91000 100644
--- a/cpp/arcticdb/storage/lmdb/lmdb_storage.cpp
+++ b/cpp/arcticdb/storage/lmdb/lmdb_storage.cpp
@@ -153,7 +153,7 @@ bool LmdbStorage::do_key_exists(const VariantKey&key) {
         MDB_val mdb_key{stored_key.size(), stored_key.data()};
         MDB_val mdb_val;
         return ::lmdb::dbi_get(txn, dbi.handle(), &mdb_key, &mdb_val);
-    } catch (const ::lmdb::not_found_error &ex) {
+    } catch ([[maybe_unused]] const ::lmdb::not_found_error &ex) {
         ARCTICDB_DEBUG(log::storage(), "Caught lmdb not found error: {}", ex.what());
         return false;
     }

From 093e4f620540f34b3626be7382c79fda01bc6161 Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Mon, 5 Feb 2024 12:31:55 +0200
Subject: [PATCH 19/30] More MSVC compilation error fixes

---
 cpp/arcticdb/storage/s3/mock_s3_client.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/arcticdb/storage/s3/mock_s3_client.cpp b/cpp/arcticdb/storage/s3/mock_s3_client.cpp
index 31eeb1fefa..6b71c6be93 100644
--- a/cpp/arcticdb/storage/s3/mock_s3_client.cpp
+++ b/cpp/arcticdb/storage/s3/mock_s3_client.cpp
@@ -50,7 +50,7 @@ std::optional<Aws::S3::S3Error> has_failure_trigger(const std::string& s3_object
         auto failure_code_string = s3_object_name.substr(position + failure_string_for_operation.size());
         auto failure_code = Aws::S3::S3Errors(std::stoi(failure_code_string));
         return Aws::S3::S3Error(Aws::Client::AWSError<Aws::S3::S3Errors>(failure_code, "Simulated error", "Simulated error message", true));
-    } catch (std::exception& e) {
+    } catch (std::exception&) {
         return std::nullopt;
     }
 }

From ff4bcdd142cf02011478a3727364647cac4b84e9 Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Mon, 5 Feb 2024 16:37:08 +0200
Subject: [PATCH 20/30] Fix compilation error in MSVC for C++ unit tests

---
 .../column_store/test/test_memory_segment.cpp | 21 ++++++++++++-------
 cpp/arcticdb/pipeline/test/test_pipeline.cpp  |  4 +++-
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/cpp/arcticdb/column_store/test/test_memory_segment.cpp b/cpp/arcticdb/column_store/test/test_memory_segment.cpp
index 6782993c5f..3416242e75 100644
--- a/cpp/arcticdb/column_store/test/test_memory_segment.cpp
+++ b/cpp/arcticdb/column_store/test/test_memory_segment.cpp
@@ -17,6 +17,7 @@
 
 #include <cstdint>
 #include <algorithm>
+#include <type_traits>
 
 TEST(MemSegment, Empty) {
     using namespace arcticdb;
@@ -138,9 +139,9 @@ TEST(MemSegment, IterateWithEmptyTypeColumn) {
     for (auto&& [idx, row]: folly::enumerate(seg)) {
         ASSERT_EQ(static_cast<int64_t>(idx), row.scalar_at<int64_t>(0));
         // Exception should be thrown regardless of the type requested for empty type columns
-        EXPECT_THROW(row.scalar_at<int64_t>(1).has_value(), InternalException);
-        EXPECT_THROW(row.scalar_at<float>(1).has_value(), InternalException);
-        EXPECT_THROW(row.scalar_at<uint8_t>(1).has_value(), InternalException);
+        EXPECT_THROW([[maybe_unused]] auto v = row.scalar_at<int64_t>(1).has_value(), InternalException);
+        EXPECT_THROW([[maybe_unused]] auto v = row.scalar_at<float>(1).has_value(), InternalException);
+        EXPECT_THROW([[maybe_unused]] auto v = row.scalar_at<uint8_t>(1).has_value(), InternalException);
     }
 }
 
@@ -176,7 +177,13 @@ TEST(MemSegment, ModifyViaIterator) {
     auto &segment = frame_wrapper.segment_;
     for (auto &row : segment) {
         for (auto &value : row) {
-            value.visit([](auto &v) { v += 1; });
+            value.visit([](auto& v) {
+                if constexpr (std::is_same_v<std::remove_cv_t<std::remove_reference_t<decltype(v)>>, bool>) {
+                    v |= 1;
+                } else {
+                    v += 1;
+                }
+            });
         }
     }
 
@@ -479,9 +486,9 @@ TEST(MemSegment, Filter) {
     for (auto&& [idx, row]: folly::enumerate(filtered_seg)) {
         ASSERT_EQ(static_cast<int64_t>(retained_rows[idx]), row.scalar_at<int64_t>(0));
         // Exception should be thrown regardless of the type requested for empty type columns
-        EXPECT_THROW(row.scalar_at<int64_t>(1).has_value(), InternalException);
-        EXPECT_THROW(row.scalar_at<float>(1).has_value(), InternalException);
-        EXPECT_THROW(row.scalar_at<uint8_t>(1).has_value(), InternalException);
+        EXPECT_THROW([[maybe_unused]] auto v = row.scalar_at<int64_t>(1).has_value(), InternalException);
+        EXPECT_THROW([[maybe_unused]] auto v = row.scalar_at<float>(1).has_value(), InternalException);
+        EXPECT_THROW([[maybe_unused]] auto v = row.scalar_at<uint8_t>(1).has_value(), InternalException);
     }
 }
 
diff --git a/cpp/arcticdb/pipeline/test/test_pipeline.cpp b/cpp/arcticdb/pipeline/test/test_pipeline.cpp
index ac3799d929..0c263c068d 100644
--- a/cpp/arcticdb/pipeline/test/test_pipeline.cpp
+++ b/cpp/arcticdb/pipeline/test/test_pipeline.cpp
@@ -182,7 +182,9 @@ TEST(Pipeline, Basic) {
     Pipeline pipeline(ex);
     TestFilter even_filter{[](const SegmentInMemory::Row& row) {
         return row[0].visit([](auto val) {
-            if constexpr(std::is_integral_v<decltype(val)>)
+            if constexpr (std::is_same_v<bool, decltype(val)>)
+                return val == false;
+            else if constexpr(std::is_integral_v<decltype(val)>)
                 return val % 2 == 0;
             else
                 return false;

From c2ef8529d9d01ccf371d910c818d0f1df6e6c34b Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Fri, 9 Feb 2024 16:46:47 +0200
Subject: [PATCH 21/30] Fix rewrite partial segment so that it doesn't try to
 truncate with empty ranges

---
 cpp/arcticdb/pipeline/write_frame.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/cpp/arcticdb/pipeline/write_frame.cpp b/cpp/arcticdb/pipeline/write_frame.cpp
index e89356295f..d91facbc09 100644
--- a/cpp/arcticdb/pipeline/write_frame.cpp
+++ b/cpp/arcticdb/pipeline/write_frame.cpp
@@ -292,7 +292,7 @@ static RowRange partial_rewrite_row_range(
         return {0, bound->row_pos()};
     } else {
         const timestamp end = std::get<timestamp>(range.end_);
-        auto bound = std::upper_bound(std::begin(segment), std::end(segment), end, [](timestamp t, auto& row) {
+        auto bound = std::upper_bound(std::begin(segment), std::end(segment), end, [](timestamp t, const auto& row) {
             return t < row.template index<TimeseriesIndex>();
         });
         return {bound->row_pos(), segment.row_count()};
@@ -339,7 +339,10 @@ std::optional<SliceAndKey> rewrite_partial_segment(
     const SegmentInMemory& segment = kv.second;
     const IndexRange affected_index_range = partial_rewrite_index_range(existing_range, index_range, affected_part);
     const RowRange affected_row_range = partial_rewrite_row_range(segment, index_range, affected_part);
-    const size_t num_rows = affected_row_range.end() - affected_row_range.start();
+    const int64_t num_rows = affected_row_range.end() - affected_row_range.start();
+    if (num_rows <= 0) {
+        return std::nullopt;
+    }
     SegmentInMemory output = segment.truncate(affected_row_range.start(), affected_row_range.end(), true);
     FrameSlice new_slice{
         std::make_shared<StreamDescriptor>(output.descriptor()),

From c6c212fd5dc41aafeea7362de6973432d663b14d Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Wed, 14 Feb 2024 13:54:51 +0200
Subject: [PATCH 22/30] Fix string reducer for static schema when empty segment
 part of a string column is read

---
 cpp/arcticdb/pipeline/frame_slice_map.hpp | 27 ++++++++++++++++++-----
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/cpp/arcticdb/pipeline/frame_slice_map.hpp b/cpp/arcticdb/pipeline/frame_slice_map.hpp
index 7b23eb4ee2..9579694215 100644
--- a/cpp/arcticdb/pipeline/frame_slice_map.hpp
+++ b/cpp/arcticdb/pipeline/frame_slice_map.hpp
@@ -8,8 +8,6 @@
 #pragma once
 
 #include <arcticdb/pipeline/pipeline_context.hpp>
-#include <unordered_map>
-#include <map>
 
 namespace arcticdb::pipelines {
 
@@ -35,13 +33,30 @@ struct FrameSliceMap {
                     continue;
                 }
 
-                if(!dynamic_schema && !is_sequence_type(field->type().data_type())) {
-                    ARCTICDB_DEBUG(log::version(), "{} not a string type in dynamic schema, skipping", field->name());
-                    continue;
+                const entity::DataType row_range_type = field->type().data_type();
+                if(!dynamic_schema && !is_sequence_type(row_range_type)) {
+                    // In case we end up with static schema and empty we must check the type of the whole column
+                    // Because we could be reading an empty segment of a string column. Example: start with [None],
+                    // then append ["string"]. If we read with row range [0;1) we would end up reading the empty segment
+                    // On read the empty type handler will fill the segment with not_a_string() and the reducer must
+                    // run over them.
+                    // TODO: This logic won't be needed when we move string handling into separate type handler
+                    if(is_empty_type(row_range_type)) {
+                        const entity::StreamDescriptor& descriptor = context_->descriptor();
+                        const size_t global_field_idx = descriptor.find_field(field->name()).value();
+                        const Field& global_field = descriptor.field(global_field_idx);
+                        const entity::DataType global_field_type = global_field.type().data_type();
+                        if(!is_sequence_type(global_field_type)) {
+                            ARCTICDB_DEBUG(log::version(), "{} not a string type in dynamic schema, skipping", field->name());
+                            continue;
+                        }
+                    } else {
+                        ARCTICDB_DEBUG(log::version(), "{} not a string type in dynamic schema, skipping", field->name());
+                        continue;
+                    }
                 }
 
                 auto& column = columns_[field->name()];
-
                 ContextData data{context_row.index_, field.index};
                 column.insert(std::make_pair(row_range, data));
             }

From 224916dd7bb37eb01be9c29c021668ab924aa814 Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Wed, 14 Feb 2024 14:09:21 +0200
Subject: [PATCH 23/30] Add comment on how pandas handles None when dtype is
 bool

---
 .../unit/arcticdb/version_store/test_empty_column_type.py     | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/python/tests/unit/arcticdb/version_store/test_empty_column_type.py b/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
index 58cccc2cb4..e033889d49 100644
--- a/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
+++ b/python/tests/unit/arcticdb/version_store/test_empty_column_type.py
@@ -69,6 +69,7 @@ def test_float(self, lmdb_version_store_static_and_dynamic, float_dtype):
         )
 
     def test_bool(self, lmdb_version_store_static_and_dynamic, boolean_dtype):
+        # Note: if dtype is bool pandas will convert None to False
         df_non_empty = pd.DataFrame({"col1": np.array([True, False, True], dtype=boolean_dtype)})
         lmdb_version_store_static_and_dynamic.append("sym", df_non_empty)
         expected_result = pd.DataFrame({"col1": np.array([None, None, True, False, True], dtype=boolean_dtype)})
@@ -185,6 +186,7 @@ def test_float(self, lmdb_version_store_static_and_dynamic, float_dtype):
         )
 
     def test_bool(self, lmdb_version_store_static_and_dynamic, boolean_dtype):
+        # Note: if dtype is bool pandas will convert None to False
         df_initial = pd.DataFrame({"col1": np.array([True, False, True], dtype=boolean_dtype)})
         df_with_none = pd.DataFrame({"col1": np.array([None, None])})
         lmdb_version_store_static_and_dynamic.write("sym", df_initial)
@@ -295,6 +297,7 @@ def test_float(self, lmdb_version_store_static_and_dynamic, float_dtype):
         )
 
     def test_bool(self, lmdb_version_store_static_and_dynamic, boolean_dtype):
+        # Note: if dtype is bool pandas will convert None to False
         lmdb_version_store_static_and_dynamic.write(
             "sym",
             pd.DataFrame({"col": [True, True, True, True]}, dtype=boolean_dtype, index=self.index())
@@ -418,6 +421,7 @@ def test_float(self, lmdb_version_store_static_and_dynamic, float_dtype):
         )
 
     def test_bool(self, lmdb_version_store_static_and_dynamic, boolean_dtype):
+        # Note: if dtype is bool pandas will convert None to False
         lmdb_version_store_static_and_dynamic.update(
             "sym",
             pd.DataFrame({"col": [True, False]}, dtype=boolean_dtype, index=self.update_index())

From a18f2b2139e3c4a71d4d27cdc245ac6fce3748fb Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Wed, 14 Feb 2024 15:00:09 +0200
Subject: [PATCH 24/30] Use std::array for flatten_and_fix_rows instead of
 boost::span

---
 cpp/arcticdb/pipeline/write_frame.cpp |  4 +++-
 cpp/arcticdb/pipeline/write_frame.hpp | 22 +++++++++++++++++-----
 cpp/arcticdb/version/version_core.cpp |  4 ++--
 3 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/cpp/arcticdb/pipeline/write_frame.cpp b/cpp/arcticdb/pipeline/write_frame.cpp
index d91facbc09..8a274f51b6 100644
--- a/cpp/arcticdb/pipeline/write_frame.cpp
+++ b/cpp/arcticdb/pipeline/write_frame.cpp
@@ -27,6 +27,8 @@
 #include <arcticdb/version/version_utils.hpp>
 #include <arcticdb/entity/merge_descriptors.hpp>
 #include <arcticdb/async/task_scheduler.hpp>
+#include <vector>
+#include <array>
 
 namespace arcticdb::pipelines {
 
@@ -361,7 +363,7 @@ std::optional<SliceAndKey> rewrite_partial_segment(
     return SliceAndKey{std::move(new_slice), std::get<AtomKey>(std::move(fut_key).get())};
 }
 
-std::vector<SliceAndKey> flatten_and_fix_rows(boost::span<const std::vector<SliceAndKey>> groups, size_t& global_count) {
+std::vector<SliceAndKey> flatten_and_fix_rows(const std::array<std::vector<SliceAndKey>, 5>& groups, size_t& global_count) {
     std::vector<SliceAndKey> output;
     output.reserve(groups.size());
     global_count = 0;
diff --git a/cpp/arcticdb/pipeline/write_frame.hpp b/cpp/arcticdb/pipeline/write_frame.hpp
index 531761cd7a..1fee3e7654 100644
--- a/cpp/arcticdb/pipeline/write_frame.hpp
+++ b/cpp/arcticdb/pipeline/write_frame.hpp
@@ -9,7 +9,7 @@
 
 #include <arcticdb/entity/index_range.hpp>
 
-#include <vector>
+
 #include <arcticdb/pipeline/input_tensor_frame.hpp>
 #include <arcticdb/stream/index.hpp>
 #include <folly/futures/Future.h>
@@ -19,8 +19,6 @@
 #include <arcticdb/storage/store.hpp>
 #include <arcticdb/pipeline/pipeline_common.hpp>
 
-#include<boost/core/span.hpp>
-
 namespace arcticdb::pipelines {
 
 using namespace arcticdb::stream;
@@ -74,7 +72,21 @@ std::optional<SliceAndKey> rewrite_partial_segment(
         AffectedSegmentPart affected_part,
         const std::shared_ptr<Store>& store);
 
-// TODO: Use std::span when C++20 is enabled
-std::vector<SliceAndKey> flatten_and_fix_rows(boost::span<const std::vector<SliceAndKey>> groups, size_t& global_count);
+
+/// Used, when updating a segment, to convert all 5 affected groups into a single list of slices
+/// The 5 groups are:
+/// * Segments before the update range which do not intersect with it and are not affected by
+///   the update
+/// * Segments before the update range which are intersecting with it and are partially affected
+///   by the update.
+/// * Segments which are fully contained inside the update range
+/// * Segments after the update range which are intersecting with it and are partially affected
+///   by the update
+/// * Segments after the update range which do not intersect with it and are not affected by the
+///   update
+std::vector<SliceAndKey> flatten_and_fix_rows(
+    const std::array<std::vector<SliceAndKey>, 5>& groups,
+    size_t& global_count
+);
 
 } //namespace arcticdb::pipelines
\ No newline at end of file
diff --git a/cpp/arcticdb/version/version_core.cpp b/cpp/arcticdb/version/version_core.cpp
index fe50495606..a575ed9ca5 100644
--- a/cpp/arcticdb/version/version_core.cpp
+++ b/cpp/arcticdb/version/version_core.cpp
@@ -283,7 +283,7 @@ VersionedItem delete_range_impl(
         std::move(intersect_before),
         std::move(intersect_after),
         strictly_after(orig_filter_range, unaffected_keys)};
-    auto flattened_slice_and_keys = flatten_and_fix_rows(boost::span{groups}, row_count);
+    auto flattened_slice_and_keys = flatten_and_fix_rows(groups, row_count);
 
     std::sort(std::begin(flattened_slice_and_keys), std::end(flattened_slice_and_keys));
     bool bucketize_dynamic = index_segment_reader.bucketize_dynamic();
@@ -387,7 +387,7 @@ VersionedItem update_impl(
         std::move(new_slice_and_keys),
         std::move(intersect_after),
         strictly_after(orig_filter_range, unaffected_keys)};
-    auto flattened_slice_and_keys = flatten_and_fix_rows(boost::span{groups}, row_count);
+    auto flattened_slice_and_keys = flatten_and_fix_rows(groups, row_count);
 
     util::check(unaffected_keys.size() + new_keys_size + (affected_keys.size() * 2) >= flattened_slice_and_keys.size(),
                 "Output size mismatch: {} + {} + (2 * {}) < {}",

From 3033303beb3b656b73ab80530ed6f87d47ec7a42 Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Wed, 14 Feb 2024 18:14:11 +0200
Subject: [PATCH 25/30] Refactor string pool reconstruction in segment
 truncation as per PR comment suggestion

---
 .../column_store/memory_segment_impl.cpp      | 27 +++++++++----------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/cpp/arcticdb/column_store/memory_segment_impl.cpp b/cpp/arcticdb/column_store/memory_segment_impl.cpp
index f483b7b4a9..5102910734 100644
--- a/cpp/arcticdb/column_store/memory_segment_impl.cpp
+++ b/cpp/arcticdb/column_store/memory_segment_impl.cpp
@@ -441,25 +441,24 @@ std::shared_ptr<SegmentInMemoryImpl> SegmentInMemoryImpl::truncate(
         output->set_metadata(std::move(metadata));
     }
 
-    for(const auto&& [idx, column] : folly::enumerate(columns_)) {
+    for (const auto&& [idx, column] : folly::enumerate(columns_)) {
         const DataType column_type = column->type().data_type();
         const Field& field = descriptor_->field(idx);
+        std::shared_ptr<Column> truncated_column = Column::truncate(column, start_row, end_row);
+
         if (is_sequence_type(column_type) && reconstruct_string_pool) {
-            output->add_column(field, num_values, true);
-            const ChunkedBuffer& source = column->data().buffer();
-            ChunkedBuffer& target = output->column(idx).data().buffer();
-            for (size_t row = 0; row < num_values; ++row) {
-                const StringPool::offset_t offset = util::variant_match(
-                    get_string_from_buffer(start_row + row, source, *string_pool_),
-                    [&](std::string_view sv) { return output->string_pool().get(sv).offset(); },
-                    [](StringPool::offset_t offset) { return offset; }
-                );
-                set_offset_string_at(row, target, offset);
+            ChunkedBuffer& truncated_buffer = truncated_column->data().buffer();
+            for (size_t row = 0; row < truncated_column->row_count(); ++row) {
+                const entity::position_t offset_val = get_offset_string_at(row, truncated_buffer);
+                if (is_a_string(offset_val)) {
+                    const std::string_view string = get_string_from_pool(offset_val, *string_pool_);
+                    const StringPool::offset_t new_offset = output->string_pool().get(string).offset();
+                    set_offset_string_at(row, truncated_buffer, new_offset);
+                }
             }
-        } else {
-            auto truncated_column = Column::truncate(column, start_row, end_row);
-            output->add_column(field, truncated_column);
         }
+
+        output->add_column(field, truncated_column);
     }
     output->attach_descriptor(descriptor_);
     return output;

From 0d0c31e04835876511e6b2cf0c6530d9ed2812e8 Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Wed, 14 Feb 2024 19:28:05 +0200
Subject: [PATCH 26/30] Fix compilation errors under linux

---
 cpp/arcticdb/column_store/memory_segment_impl.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/arcticdb/column_store/memory_segment_impl.cpp b/cpp/arcticdb/column_store/memory_segment_impl.cpp
index 5102910734..c2aa86d32e 100644
--- a/cpp/arcticdb/column_store/memory_segment_impl.cpp
+++ b/cpp/arcticdb/column_store/memory_segment_impl.cpp
@@ -448,7 +448,7 @@ std::shared_ptr<SegmentInMemoryImpl> SegmentInMemoryImpl::truncate(
 
         if (is_sequence_type(column_type) && reconstruct_string_pool) {
             ChunkedBuffer& truncated_buffer = truncated_column->data().buffer();
-            for (size_t row = 0; row < truncated_column->row_count(); ++row) {
+            for (position_t row = 0; row < truncated_column->row_count(); ++row) {
                 const entity::position_t offset_val = get_offset_string_at(row, truncated_buffer);
                 if (is_a_string(offset_val)) {
                     const std::string_view string = get_string_from_pool(offset_val, *string_pool_);

From 5684779a8ad22860648a7305f0ca4f80698a6347 Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Thu, 15 Feb 2024 15:31:43 +0200
Subject: [PATCH 27/30] Prevent creation of sparse map for empty typed columns

---
 cpp/arcticdb/codec/codec-inl.hpp     | 1 +
 cpp/arcticdb/codec/codec.cpp         | 3 ++-
 cpp/arcticdb/column_store/column.cpp | 8 ++++----
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/cpp/arcticdb/codec/codec-inl.hpp b/cpp/arcticdb/codec/codec-inl.hpp
index 1c628cb928..cb170cf6a8 100644
--- a/cpp/arcticdb/codec/codec-inl.hpp
+++ b/cpp/arcticdb/codec/codec-inl.hpp
@@ -131,6 +131,7 @@ std::size_t decode_ndarray(
         }
 
         if(field.sparse_map_bytes()) {
+            util::check(!is_empty_type(type_desc_tag.data_type()), "Empty typed columns should not have sparse map");
             util::check_magic<util::BitMagicStart>(data_in);
             const auto bitmap_size = field.sparse_map_bytes() - util::combined_bit_magic_delimiters_size();
             bv = util::deserialize_bytes_to_bitmap(data_in, bitmap_size);
diff --git a/cpp/arcticdb/codec/codec.cpp b/cpp/arcticdb/codec/codec.cpp
index 395479502c..1aeebc689a 100644
--- a/cpp/arcticdb/codec/codec.cpp
+++ b/cpp/arcticdb/codec/codec.cpp
@@ -514,7 +514,8 @@ void encode_sparse_map(
     Buffer& out,
     std::ptrdiff_t& pos
 ) {
-    if (column_data.bit_vector() != nullptr && column_data.bit_vector()->count() > 0)   {
+    if (column_data.bit_vector() != nullptr && column_data.bit_vector()->count() > 0) {
+        util::check(!is_empty_type(column_data.type().data_type()), "Empty typed columns should not have sparse maps");
         ARCTICDB_DEBUG(log::codec(), "Sparse map count = {} pos = {}", column_data.bit_vector()->count(), pos);
         const size_t sparse_bm_bytes = encode_bitmap(*column_data.bit_vector(), out, pos);
         util::variant_match(variant_field, [&](auto field) {
diff --git a/cpp/arcticdb/column_store/column.cpp b/cpp/arcticdb/column_store/column.cpp
index e3e7102973..c8d991efc8 100644
--- a/cpp/arcticdb/column_store/column.cpp
+++ b/cpp/arcticdb/column_store/column.cpp
@@ -323,6 +323,9 @@ void Column::default_initialize_rows(size_t start_pos, size_t num_rows, bool ens
 }
 
 void Column::set_row_data(size_t row_id) {
+    if (is_empty_type(type_.data_type())) {
+        return;
+    }
     last_logical_row_ = row_id;
     const auto last_stored_row = row_count() - 1;
     if(sparse_map_) {
@@ -592,10 +595,7 @@ std::shared_ptr<Column> Column::truncate(const std::shared_ptr<Column>& column,
     auto buffer = ::arcticdb::truncate(column->data_.buffer(), start_byte, end_byte);
     auto res = std::make_shared<Column>(column->type(), column->allow_sparse_, std::move(buffer));
     if (column->is_sparse()) {
-        util::BitMagic output_sparse_map = is_empty_type(column->type().data_type())
-            ? util::BitMagic{}
-            : truncate_sparse_map(column->sparse_map(), start_row, end_row);
-        res->set_sparse_map(std::move(output_sparse_map));
+        res->set_sparse_map(truncate_sparse_map(column->sparse_map(), start_row, end_row));
     }
     res->set_row_data(end_row - (start_row + 1));
     return res;

From 275b62d48a5306d5b562857c9ceac9e8e8673a48 Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Fri, 16 Feb 2024 18:24:18 +0200
Subject: [PATCH 28/30] Use Column::transform when reconstructing the string
 pool

Use template type-deduced functor in Column::transform instead of
folly::Function.
---
 cpp/arcticdb/column_store/column.hpp          | 17 +++++++++---
 .../column_store/memory_segment_impl.cpp      | 27 +++++++++----------
 2 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/cpp/arcticdb/column_store/column.hpp b/cpp/arcticdb/column_store/column.hpp
index da08078812..b5698318d8 100644
--- a/cpp/arcticdb/column_store/column.hpp
+++ b/cpp/arcticdb/column_store/column.hpp
@@ -669,6 +669,9 @@ class Column {
      * When adding new uses of these static methods, add an internal::check<...>(f.heapAllocatedMemory() == 0,...)
      * and run all of the tests in CI on all supported platforms. We do not want these checks in the released code,
      * but also do not want any passed in lambdas to be heap allocated.
+     *
+     * TODO: Use type-deduced functor (with enable_if to verify it's in/out types) in all functions similar to transform.
+     *   In case C++20 is available use concepts.
      */
 
     template<typename input_tdt>
@@ -680,13 +683,21 @@ class Column {
         });
     }
 
-    template<typename input_tdt, typename output_tdt>
+    template <
+        typename input_tdt,
+        typename output_tdt,
+        typename FunctorT,
+        typename = std::enable_if_t<std::is_invocable_r_v<
+            typename output_tdt::DataTypeTag::raw_type,
+            FunctorT,
+            typename input_tdt::DataTypeTag::raw_type>>>
     static void transform(const Column& input_column,
                           Column& output_column,
-                          folly::Function<typename output_tdt::DataTypeTag::raw_type(typename input_tdt::DataTypeTag::raw_type)>&& f) {
+                          FunctorT&& f
+    ) {
         auto input_data = input_column.data();
         auto output_data = output_column.data();
-        std::transform(input_data.cbegin<input_tdt>(), input_data.cend<input_tdt>(), output_data.begin<output_tdt>(), std::move(f));
+        std::transform(input_data.cbegin<input_tdt>(), input_data.cend<input_tdt>(), output_data.begin<output_tdt>(), std::forward<FunctorT>(f));
     }
 
     template<typename left_input_tdt, typename right_input_tdt, typename output_tdt>
diff --git a/cpp/arcticdb/column_store/memory_segment_impl.cpp b/cpp/arcticdb/column_store/memory_segment_impl.cpp
index c2aa86d32e..54342c9ff8 100644
--- a/cpp/arcticdb/column_store/memory_segment_impl.cpp
+++ b/cpp/arcticdb/column_store/memory_segment_impl.cpp
@@ -442,23 +442,22 @@ std::shared_ptr<SegmentInMemoryImpl> SegmentInMemoryImpl::truncate(
     }
 
     for (const auto&& [idx, column] : folly::enumerate(columns_)) {
-        const DataType column_type = column->type().data_type();
+        const TypeDescriptor column_type = column->type();
         const Field& field = descriptor_->field(idx);
         std::shared_ptr<Column> truncated_column = Column::truncate(column, start_row, end_row);
-
-        if (is_sequence_type(column_type) && reconstruct_string_pool) {
-            ChunkedBuffer& truncated_buffer = truncated_column->data().buffer();
-            for (position_t row = 0; row < truncated_column->row_count(); ++row) {
-                const entity::position_t offset_val = get_offset_string_at(row, truncated_buffer);
-                if (is_a_string(offset_val)) {
-                    const std::string_view string = get_string_from_pool(offset_val, *string_pool_);
-                    const StringPool::offset_t new_offset = output->string_pool().get(string).offset();
-                    set_offset_string_at(row, truncated_buffer, new_offset);
-                }
+        column_type.visit_tag([&](auto tag) {
+            if constexpr (is_sequence_type(decltype(tag)::data_type())) {
+                Column::transform<decltype(tag), decltype(tag)>(
+                    *truncated_column,
+                    *truncated_column,
+                    [this, &output](auto string_pool_offset) {
+                        const std::string_view string = get_string_from_pool(string_pool_offset, *string_pool_);
+                        return output->string_pool().get(string).offset();
+                    }
+                );
             }
-        }
-
-        output->add_column(field, truncated_column);
+        });
+        output->add_column(field, std::move(truncated_column));
     }
     output->attach_descriptor(descriptor_);
     return output;

From e6e3e238b1b95edc9f24f442da94228321aab668 Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Fri, 16 Feb 2024 19:50:37 +0200
Subject: [PATCH 29/30] Fix MSVC compilation errors

---
 cpp/arcticdb/storage/azure/azure_mock_client.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/arcticdb/storage/azure/azure_mock_client.cpp b/cpp/arcticdb/storage/azure/azure_mock_client.cpp
index 5b0bf3b5ce..ef848f3d48 100644
--- a/cpp/arcticdb/storage/azure/azure_mock_client.cpp
+++ b/cpp/arcticdb/storage/azure/azure_mock_client.cpp
@@ -52,7 +52,7 @@ std::optional<Azure::Core::RequestFailedException> has_failure_trigger(const std
         auto error_message = fmt::format("Simulated Error, message: #{}_{}", failure_string_for_operation, (int) status_code);
 
         return get_exception(error_message, status_code);
-    } catch (std::exception& e) {
+    } catch (std::exception&) {
         return std::nullopt;
     }
 }

From aeff5a26643b6cae3b1fd83bbac2ee405413a512 Mon Sep 17 00:00:00 2001
From: Vasil Pashov <vasil.pashov@man.com>
Date: Mon, 19 Feb 2024 15:01:06 +0200
Subject: [PATCH 30/30] Fix string pool reconstruction by adding checks for
 is_a_string

---
 cpp/arcticdb/column_store/memory_segment_impl.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/cpp/arcticdb/column_store/memory_segment_impl.cpp b/cpp/arcticdb/column_store/memory_segment_impl.cpp
index 54342c9ff8..d834561ad4 100644
--- a/cpp/arcticdb/column_store/memory_segment_impl.cpp
+++ b/cpp/arcticdb/column_store/memory_segment_impl.cpp
@@ -450,9 +450,12 @@ std::shared_ptr<SegmentInMemoryImpl> SegmentInMemoryImpl::truncate(
                 Column::transform<decltype(tag), decltype(tag)>(
                     *truncated_column,
                     *truncated_column,
-                    [this, &output](auto string_pool_offset) {
-                        const std::string_view string = get_string_from_pool(string_pool_offset, *string_pool_);
-                        return output->string_pool().get(string).offset();
+                    [this, &output](auto string_pool_offset) -> arcticdb::OffsetString::offset_t {
+                        if (is_a_string(string_pool_offset)) {
+                            const std::string_view string = get_string_from_pool(string_pool_offset, *string_pool_);
+                            return output->string_pool().get(string).offset();
+                        }
+                        return string_pool_offset;
                     }
                 );
             }