1
+ use std:: ops:: Deref ;
2
+
1
3
// Licensed to the Apache Software Foundation (ASF) under one
2
4
// or more contributor license agreements. See the NOTICE file
3
5
// distributed with this work for additional information
@@ -29,6 +31,65 @@ mod list;
29
31
mod metadata;
30
32
mod object;
31
33
34
+ const MAX_SHORT_STRING_BYTES : usize = 0x3F ;
35
+
36
+ /// A Variant [`ShortString`]
37
+ ///
38
+ /// This implementation is a zero cost wrapper over `&str` that ensures
39
+ /// the length of the underlying string is a valid Variant short string (63 bytes or less)
40
+ #[ derive( Debug , Clone , Copy , PartialEq ) ]
41
+ pub struct ShortString < ' a > ( pub ( crate ) & ' a str ) ;
42
+
43
+ impl < ' a > ShortString < ' a > {
44
+ /// Attempts to interpret `value` as a variant short string value.
45
+ ///
46
+ /// # Validation
47
+ ///
48
+ /// This constructor verifies that `value` is shorter than or equal to `MAX_SHORT_STRING_BYTES`
49
+ pub fn try_new ( value : & ' a str ) -> Result < Self , ArrowError > {
50
+ if value. len ( ) > MAX_SHORT_STRING_BYTES {
51
+ return Err ( ArrowError :: InvalidArgumentError ( format ! (
52
+ "value is larger than {MAX_SHORT_STRING_BYTES} bytes"
53
+ ) ) ) ;
54
+ }
55
+
56
+ Ok ( Self ( value) )
57
+ }
58
+
59
+ /// Returns the underlying Variant short string as a &str
60
+ pub fn as_str ( & self ) -> & ' a str {
61
+ self . 0
62
+ }
63
+ }
64
+
65
+ impl < ' a > From < ShortString < ' a > > for & ' a str {
66
+ fn from ( value : ShortString < ' a > ) -> Self {
67
+ value. 0
68
+ }
69
+ }
70
+
71
+ impl < ' a > TryFrom < & ' a str > for ShortString < ' a > {
72
+ type Error = ArrowError ;
73
+
74
+ fn try_from ( value : & ' a str ) -> Result < Self , Self :: Error > {
75
+ Self :: try_new ( value)
76
+ }
77
+ }
78
+
79
+ impl < ' a > AsRef < str > for ShortString < ' a > {
80
+ fn as_ref ( & self ) -> & str {
81
+ self . 0
82
+ }
83
+ }
84
+
85
+ impl < ' a > Deref for ShortString < ' a > {
86
+ type Target = str ;
87
+
88
+ fn deref ( & self ) -> & Self :: Target {
89
+ self . 0
90
+ }
91
+ }
92
+
32
93
/// Represents a [Parquet Variant]
33
94
///
34
95
/// The lifetimes `'m` and `'v` are for metadata and value buffers, respectively.
@@ -85,7 +146,7 @@ mod object;
85
146
///
86
147
/// ## Creating `Variant` from Rust Types
87
148
/// ```
88
- /// # use parquet_variant::Variant;
149
+ /// use parquet_variant::Variant;
89
150
/// // variants can be directly constructed
90
151
/// let variant = Variant::Int32(123);
91
152
/// // or constructed via `From` impls
@@ -98,7 +159,7 @@ mod object;
98
159
/// let value = [0x09, 0x48, 0x49];
99
160
/// // parse the header metadata
100
161
/// assert_eq!(
101
- /// Variant::ShortString ("HI"),
162
+ /// Variant::from ("HI"),
102
163
/// Variant::try_new(&metadata, &value).unwrap()
103
164
/// );
104
165
/// ```
@@ -152,7 +213,7 @@ pub enum Variant<'m, 'v> {
152
213
/// Primitive (type_id=1): STRING
153
214
String ( & ' v str ) ,
154
215
/// Short String (type_id=2): STRING
155
- ShortString ( & ' v str ) ,
216
+ ShortString ( ShortString < ' v > ) ,
156
217
// need both metadata & value
157
218
/// Object (type_id=3): N/A
158
219
Object ( VariantObject < ' m , ' v > ) ,
@@ -165,12 +226,12 @@ impl<'m, 'v> Variant<'m, 'v> {
165
226
///
166
227
/// # Example
167
228
/// ```
168
- /// # use parquet_variant::{Variant, VariantMetadata};
229
+ /// use parquet_variant::{Variant, VariantMetadata};
169
230
/// let metadata = [0x01, 0x00, 0x00];
170
231
/// let value = [0x09, 0x48, 0x49];
171
232
/// // parse the header metadata
172
233
/// assert_eq!(
173
- /// Variant::ShortString ("HI"),
234
+ /// Variant::from ("HI"),
174
235
/// Variant::try_new(&metadata, &value).unwrap()
175
236
/// );
176
237
/// ```
@@ -189,7 +250,7 @@ impl<'m, 'v> Variant<'m, 'v> {
189
250
/// // parse the header metadata first
190
251
/// let metadata = VariantMetadata::try_new(&metadata).unwrap();
191
252
/// assert_eq!(
192
- /// Variant::ShortString ("HI"),
253
+ /// Variant::from ("HI"),
193
254
/// Variant::try_new_with_metadata(metadata, &value).unwrap()
194
255
/// );
195
256
/// ```
@@ -432,7 +493,7 @@ impl<'m, 'v> Variant<'m, 'v> {
432
493
///
433
494
/// // you can extract a string from string variants
434
495
/// let s = "hello!";
435
- /// let v1 = Variant::ShortString (s);
496
+ /// let v1 = Variant::from (s);
436
497
/// assert_eq!(v1.as_string(), Some(s));
437
498
///
438
499
/// // but not from other variants
@@ -441,7 +502,7 @@ impl<'m, 'v> Variant<'m, 'v> {
441
502
/// ```
442
503
pub fn as_string ( & ' v self ) -> Option < & ' v str > {
443
504
match self {
444
- Variant :: String ( s) | Variant :: ShortString ( s ) => Some ( s) ,
505
+ Variant :: String ( s) | Variant :: ShortString ( ShortString ( s ) ) => Some ( s) ,
445
506
_ => None ,
446
507
}
447
508
}
@@ -861,10 +922,25 @@ impl<'v> From<&'v [u8]> for Variant<'_, 'v> {
861
922
862
923
impl < ' v > From < & ' v str > for Variant < ' _ , ' v > {
863
924
fn from ( value : & ' v str ) -> Self {
864
- if value. len ( ) < 64 {
865
- Variant :: ShortString ( value)
866
- } else {
925
+ if value. len ( ) > MAX_SHORT_STRING_BYTES {
867
926
Variant :: String ( value)
927
+ } else {
928
+ Variant :: ShortString ( ShortString ( value) )
868
929
}
869
930
}
870
931
}
932
+
933
+ #[ cfg( test) ]
934
+ mod tests {
935
+ use super :: * ;
936
+
937
+ #[ test]
938
+ fn test_construct_short_string ( ) {
939
+ let short_string = ShortString :: try_new ( "norm" ) . expect ( "should fit in short string" ) ;
940
+ assert_eq ! ( short_string. as_str( ) , "norm" ) ;
941
+
942
+ let long_string = "a" . repeat ( MAX_SHORT_STRING_BYTES + 1 ) ;
943
+ let res = ShortString :: try_new ( & long_string) ;
944
+ assert ! ( res. is_err( ) ) ;
945
+ }
946
+ }
0 commit comments