From aa9e6fc04237ed8e4ec2607638eb21acb4ced859 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 24 Oct 2025 11:56:43 +0100 Subject: [PATCH 1/5] feat[layout]: dict layout known code nullability Signed-off-by: Joe Isaacs --- vortex-layout/src/layouts/dict/mod.rs | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/vortex-layout/src/layouts/dict/mod.rs b/vortex-layout/src/layouts/dict/mod.rs index 41d72bf1be1..e9e8aaf2ffc 100644 --- a/vortex-layout/src/layouts/dict/mod.rs +++ b/vortex-layout/src/layouts/dict/mod.rs @@ -41,9 +41,10 @@ impl VTable for DictVTable { } fn metadata(layout: &Self::Layout) -> Self::Metadata { - ProstMetadata(DictLayoutMetadata::new( - PType::try_from(layout.codes.dtype()).vortex_expect("ptype"), - )) + let mut metadata = + DictLayoutMetadata::new(PType::try_from(layout.codes.dtype()).vortex_expect("ptype")); + metadata.is_nullable_codes = Some(layout.codes.dtype().is_nullable()); + ProstMetadata(metadata) } fn segment_ids(_layout: &Self::Layout) -> Vec { @@ -92,10 +93,13 @@ impl VTable for DictVTable { _ctx: ArrayContext, ) -> VortexResult { let values = children.child(0, dtype)?; - let codes = children.child( - 1, - &DType::Primitive(metadata.codes_ptype(), dtype.nullability()), - )?; + let codes_nullable = metadata + .is_nullable_codes + // The old behaviour (without `is_nullable_codes` metadata) used the nullability + // of the values (and whole array). + .unwrap_or_else(|| dtype.is_nullable()) + .into(); + let codes = children.child(1, &DType::Primitive(metadata.codes_ptype(), codes_nullable))?; Ok(DictLayout { values, codes }) } } @@ -120,6 +124,9 @@ pub struct DictLayoutMetadata { #[prost(enumeration = "PType", tag = "1")] // i32 is required for proto, use the generated getter to read this field. codes_ptype: i32, + // nullable codes are optional since they were added after stabilisation + #[prost(optional, bool, tag = "2")] + is_nullable_codes: Option, } impl DictLayoutMetadata { From ebeef82e6d82c2654638ec77e9d164cfc4553951 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 24 Oct 2025 12:02:11 +0100 Subject: [PATCH 2/5] feat[layout]: dict layout known code nullability Signed-off-by: Joe Isaacs --- encodings/dict/src/serde.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/encodings/dict/src/serde.rs b/encodings/dict/src/serde.rs index eb33131fcfa..d3d3489ff2f 100644 --- a/encodings/dict/src/serde.rs +++ b/encodings/dict/src/serde.rs @@ -54,11 +54,12 @@ impl SerdeVTable for DictVTable { children.len() ) } - let codes_nullable: Nullability = metadata + let codes_nullable = metadata .is_nullable_codes - // The old behaviour of (without `is_nullable_codes` metadata) used the nullability - // of the values (and whole array). - .unwrap_or_else(|| dtype.is_nullable()) + .map(|nullable| nullable.into()) + // If no `is_nullable_codes` metadata use the nullability of the values + // (and whole array) as before. + .unwrap_or_else(|| dtype.nullability()) .into(); let codes_dtype = DType::Primitive(metadata.codes_ptype(), codes_nullable); let codes = children.get(0, &codes_dtype, len)?; From 3bd34c8430e248e699a8abbabdfbcf6061666ace Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 24 Oct 2025 12:03:44 +0100 Subject: [PATCH 3/5] feat[layout]: dict layout known code nullability Signed-off-by: Joe Isaacs --- encodings/dict/src/serde.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/encodings/dict/src/serde.rs b/encodings/dict/src/serde.rs index d3d3489ff2f..5a9fc7c4bbd 100644 --- a/encodings/dict/src/serde.rs +++ b/encodings/dict/src/serde.rs @@ -56,7 +56,7 @@ impl SerdeVTable for DictVTable { } let codes_nullable = metadata .is_nullable_codes - .map(|nullable| nullable.into()) + .map(Nullability::from) // If no `is_nullable_codes` metadata use the nullability of the values // (and whole array) as before. .unwrap_or_else(|| dtype.nullability()) From 6bc224b2aef370cc884157d193707acc75c6e047 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 24 Oct 2025 12:14:58 +0100 Subject: [PATCH 4/5] feat[layout]: dict layout known code nullability Signed-off-by: Joe Isaacs --- encodings/dict/src/serde.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/encodings/dict/src/serde.rs b/encodings/dict/src/serde.rs index 5a9fc7c4bbd..d777826ba6a 100644 --- a/encodings/dict/src/serde.rs +++ b/encodings/dict/src/serde.rs @@ -59,8 +59,7 @@ impl SerdeVTable for DictVTable { .map(Nullability::from) // If no `is_nullable_codes` metadata use the nullability of the values // (and whole array) as before. - .unwrap_or_else(|| dtype.nullability()) - .into(); + .unwrap_or_else(|| dtype.nullability()); let codes_dtype = DType::Primitive(metadata.codes_ptype(), codes_nullable); let codes = children.get(0, &codes_dtype, len)?; let values = children.get(1, dtype, metadata.values_len as usize)?; From b7b87e758ba7f63e620d424cf345e8be8c1e5ce8 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 24 Oct 2025 12:16:28 +0100 Subject: [PATCH 5/5] feat[layout]: dict layout known code nullability Signed-off-by: Joe Isaacs --- vortex-layout/src/layouts/dict/mod.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/vortex-layout/src/layouts/dict/mod.rs b/vortex-layout/src/layouts/dict/mod.rs index e9e8aaf2ffc..d05d72cce91 100644 --- a/vortex-layout/src/layouts/dict/mod.rs +++ b/vortex-layout/src/layouts/dict/mod.rs @@ -8,7 +8,7 @@ use std::sync::Arc; use reader::DictReader; use vortex_array::{ArrayContext, DeserializeMetadata, ProstMetadata}; -use vortex_dtype::{DType, PType}; +use vortex_dtype::{DType, Nullability, PType}; use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_panic}; use crate::children::LayoutChildren; @@ -95,10 +95,11 @@ impl VTable for DictVTable { let values = children.child(0, dtype)?; let codes_nullable = metadata .is_nullable_codes + .map(Nullability::from) // The old behaviour (without `is_nullable_codes` metadata) used the nullability // of the values (and whole array). - .unwrap_or_else(|| dtype.is_nullable()) - .into(); + // see [`SerdeVTable::build`]. + .unwrap_or_else(|| dtype.nullability()); let codes = children.child(1, &DType::Primitive(metadata.codes_ptype(), codes_nullable))?; Ok(DictLayout { values, codes }) }