@@ -26,24 +26,40 @@ use parquet::file::statistics::Statistics;
26
26
pub struct BoolType {
27
27
pub min : bool ,
28
28
pub max : bool ,
29
+ pub distinct_count : u64 ,
30
+ pub null_count : u64 ,
31
+ pub is_max_value_exact : bool ,
32
+ pub is_min_value_exact : bool ,
29
33
}
30
34
31
35
#[ derive( Debug , Clone , serde:: Serialize , serde:: Deserialize ) ]
32
36
pub struct Float64Type {
33
37
pub min : f64 ,
34
38
pub max : f64 ,
39
+ pub distinct_count : u64 ,
40
+ pub null_count : u64 ,
41
+ pub is_max_value_exact : bool ,
42
+ pub is_min_value_exact : bool ,
35
43
}
36
44
37
45
#[ derive( Debug , Clone , serde:: Serialize , serde:: Deserialize ) ]
38
46
pub struct Int64Type {
39
47
pub min : i64 ,
40
48
pub max : i64 ,
49
+ pub distinct_count : u64 ,
50
+ pub null_count : u64 ,
51
+ pub is_max_value_exact : bool ,
52
+ pub is_min_value_exact : bool ,
41
53
}
42
54
43
55
#[ derive( Debug , Clone , serde:: Serialize , serde:: Deserialize ) ]
44
56
pub struct Utf8Type {
45
57
pub min : String ,
46
58
pub max : String ,
59
+ pub distinct_count : u64 ,
60
+ pub null_count : u64 ,
61
+ pub is_max_value_exact : bool ,
62
+ pub is_min_value_exact : bool ,
47
63
}
48
64
49
65
// Typed statistics are typed variant of statistics
@@ -64,24 +80,40 @@ impl TypedStatistics {
64
80
TypedStatistics :: Bool ( BoolType {
65
81
min : min ( this. min , other. min ) ,
66
82
max : max ( this. max , other. max ) ,
83
+ distinct_count : this. distinct_count + other. distinct_count ,
84
+ null_count : this. null_count + other. null_count ,
85
+ is_max_value_exact : this. is_max_value_exact && other. is_max_value_exact ,
86
+ is_min_value_exact : this. is_min_value_exact && other. is_min_value_exact ,
67
87
} )
68
88
}
69
89
( TypedStatistics :: Float ( this) , TypedStatistics :: Float ( other) ) => {
70
90
TypedStatistics :: Float ( Float64Type {
71
91
min : this. min . min ( other. min ) ,
72
92
max : this. max . max ( other. max ) ,
93
+ distinct_count : this. distinct_count + other. distinct_count ,
94
+ null_count : this. null_count + other. null_count ,
95
+ is_max_value_exact : this. is_max_value_exact && other. is_max_value_exact ,
96
+ is_min_value_exact : this. is_min_value_exact && other. is_min_value_exact ,
73
97
} )
74
98
}
75
99
( TypedStatistics :: Int ( this) , TypedStatistics :: Int ( other) ) => {
76
100
TypedStatistics :: Int ( Int64Type {
77
101
min : min ( this. min , other. min ) ,
78
102
max : max ( this. max , other. max ) ,
103
+ distinct_count : this. distinct_count + other. distinct_count ,
104
+ null_count : this. null_count + other. null_count ,
105
+ is_max_value_exact : this. is_max_value_exact && other. is_max_value_exact ,
106
+ is_min_value_exact : this. is_min_value_exact && other. is_min_value_exact ,
79
107
} )
80
108
}
81
109
( TypedStatistics :: String ( this) , TypedStatistics :: String ( other) ) => {
82
110
TypedStatistics :: String ( Utf8Type {
83
111
min : min ( this. min , other. min ) ,
84
112
max : max ( this. max , other. max ) ,
113
+ distinct_count : this. distinct_count + other. distinct_count ,
114
+ null_count : this. null_count + other. null_count ,
115
+ is_max_value_exact : this. is_max_value_exact && other. is_max_value_exact ,
116
+ is_min_value_exact : this. is_min_value_exact && other. is_min_value_exact ,
85
117
} )
86
118
}
87
119
_ => panic ! ( "Cannot update wrong types" ) ,
@@ -146,26 +178,74 @@ impl TryFrom<&Statistics> for TypedStatistics {
146
178
Statistics :: Boolean ( stats) => TypedStatistics :: Bool ( BoolType {
147
179
min : * stats. min_opt ( ) . expect ( "Boolean stats min not set" ) ,
148
180
max : * stats. max_opt ( ) . expect ( "Boolean stats max not set" ) ,
181
+ distinct_count : stats
182
+ . distinct_count ( )
183
+ . expect ( "Boolean stats distinct count not set" ) ,
184
+ null_count : stats
185
+ . null_count_opt ( )
186
+ . expect ( "Boolean stats null count not set" ) ,
187
+ is_max_value_exact : stats. max_is_exact ( ) ,
188
+ is_min_value_exact : stats. min_is_exact ( ) ,
149
189
} ) ,
150
190
Statistics :: Int32 ( stats) => TypedStatistics :: Int ( Int64Type {
151
191
min : * stats. min_opt ( ) . expect ( "Int32 stats min not set" ) as i64 ,
152
192
max : * stats. max_opt ( ) . expect ( "Int32 stats max not set" ) as i64 ,
193
+ distinct_count : stats
194
+ . distinct_count ( )
195
+ . expect ( "Boolean stats distinct count not set" ) ,
196
+ null_count : stats
197
+ . null_count_opt ( )
198
+ . expect ( "Boolean stats null count not set" ) ,
199
+ is_max_value_exact : stats. max_is_exact ( ) ,
200
+ is_min_value_exact : stats. min_is_exact ( ) ,
153
201
} ) ,
154
202
Statistics :: Int64 ( stats) => TypedStatistics :: Int ( Int64Type {
155
203
min : * stats. min_opt ( ) . expect ( "Int64 stats min not set" ) ,
156
204
max : * stats. max_opt ( ) . expect ( "Int64 stats max not set" ) ,
205
+ distinct_count : stats
206
+ . distinct_count ( )
207
+ . expect ( "Boolean stats distinct count not set" ) ,
208
+ null_count : stats
209
+ . null_count_opt ( )
210
+ . expect ( "Boolean stats null count not set" ) ,
211
+ is_max_value_exact : stats. max_is_exact ( ) ,
212
+ is_min_value_exact : stats. min_is_exact ( ) ,
157
213
} ) ,
158
214
Statistics :: Int96 ( stats) => TypedStatistics :: Int ( Int64Type {
159
215
min : stats. min_opt ( ) . expect ( "Int96 stats min not set" ) . to_i64 ( ) ,
160
216
max : stats. max_opt ( ) . expect ( "Int96 stats max not set" ) . to_i64 ( ) ,
217
+ distinct_count : stats
218
+ . distinct_count ( )
219
+ . expect ( "Boolean stats distinct count not set" ) ,
220
+ null_count : stats
221
+ . null_count_opt ( )
222
+ . expect ( "Boolean stats null count not set" ) ,
223
+ is_max_value_exact : stats. max_is_exact ( ) ,
224
+ is_min_value_exact : stats. min_is_exact ( ) ,
161
225
} ) ,
162
226
Statistics :: Float ( stats) => TypedStatistics :: Float ( Float64Type {
163
227
min : * stats. min_opt ( ) . expect ( "Float32 stats min not set" ) as f64 ,
164
228
max : * stats. max_opt ( ) . expect ( "Float32 stats max not set" ) as f64 ,
229
+ distinct_count : stats
230
+ . distinct_count ( )
231
+ . expect ( "Boolean stats distinct count not set" ) ,
232
+ null_count : stats
233
+ . null_count_opt ( )
234
+ . expect ( "Boolean stats null count not set" ) ,
235
+ is_max_value_exact : stats. max_is_exact ( ) ,
236
+ is_min_value_exact : stats. min_is_exact ( ) ,
165
237
} ) ,
166
238
Statistics :: Double ( stats) => TypedStatistics :: Float ( Float64Type {
167
239
min : * stats. min_opt ( ) . expect ( "Float64 stats min not set" ) ,
168
240
max : * stats. max_opt ( ) . expect ( "Float64 stats max not set" ) ,
241
+ distinct_count : stats
242
+ . distinct_count ( )
243
+ . expect ( "Boolean stats distinct count not set" ) ,
244
+ null_count : stats
245
+ . null_count_opt ( )
246
+ . expect ( "Boolean stats null count not set" ) ,
247
+ is_max_value_exact : stats. max_is_exact ( ) ,
248
+ is_min_value_exact : stats. min_is_exact ( ) ,
169
249
} ) ,
170
250
Statistics :: ByteArray ( stats) => TypedStatistics :: String ( Utf8Type {
171
251
min : stats
@@ -178,6 +258,14 @@ impl TryFrom<&Statistics> for TypedStatistics {
178
258
. expect ( "Utf8 stats max not set" )
179
259
. as_utf8 ( ) ?
180
260
. to_owned ( ) ,
261
+ distinct_count : stats
262
+ . distinct_count ( )
263
+ . expect ( "Boolean stats distinct count not set" ) ,
264
+ null_count : stats
265
+ . null_count_opt ( )
266
+ . expect ( "Boolean stats null count not set" ) ,
267
+ is_max_value_exact : stats. max_is_exact ( ) ,
268
+ is_min_value_exact : stats. min_is_exact ( ) ,
181
269
} ) ,
182
270
Statistics :: FixedLenByteArray ( stats) => TypedStatistics :: String ( Utf8Type {
183
271
min : stats
@@ -190,6 +278,14 @@ impl TryFrom<&Statistics> for TypedStatistics {
190
278
. expect ( "Utf8 stats max not set" )
191
279
. as_utf8 ( ) ?
192
280
. to_owned ( ) ,
281
+ distinct_count : stats
282
+ . distinct_count ( )
283
+ . expect ( "Boolean stats distinct count not set" ) ,
284
+ null_count : stats
285
+ . null_count_opt ( )
286
+ . expect ( "Boolean stats null count not set" ) ,
287
+ is_max_value_exact : stats. max_is_exact ( ) ,
288
+ is_min_value_exact : stats. min_is_exact ( ) ,
193
289
} ) ,
194
290
} ;
195
291
0 commit comments