Skip to content

Commit eef80e4

Browse files
e1ijah1paomian
authored andcommitted
feat(datatypes): implement VectorOp::take (GreptimeTeam#1115)
* feat: add take index method for VectorOp * chore: make clippy happy * chore: make clippy happy * chore: improve the code * chore: improve the code * chore: add take null test * chore: fix clippy
1 parent cfad37a commit eef80e4

File tree

3 files changed

+260
-5
lines changed

3 files changed

+260
-5
lines changed

src/datatypes/src/vectors/constant.rs

+33-4
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,14 @@ use std::any::Any;
1616
use std::fmt;
1717
use std::sync::Arc;
1818

19-
use arrow::array::{Array, ArrayRef};
20-
use snafu::ResultExt;
19+
use arrow::array::{Array, ArrayRef, UInt32Array};
20+
use snafu::{ensure, ResultExt};
2121

2222
use crate::data_type::ConcreteDataType;
23-
use crate::error::{Result, SerializeSnafu};
23+
use crate::error::{self, Result, SerializeSnafu};
2424
use crate::serialize::Serializable;
2525
use crate::value::{Value, ValueRef};
26-
use crate::vectors::{BooleanVector, Helper, Validity, Vector, VectorRef};
26+
use crate::vectors::{BooleanVector, Helper, UInt32Vector, Validity, Vector, VectorRef};
2727

2828
#[derive(Clone)]
2929
pub struct ConstantVector {
@@ -83,6 +83,35 @@ impl ConstantVector {
8383
self.length,
8484
)))
8585
}
86+
87+
pub(crate) fn take_vector(&self, indices: &UInt32Vector) -> Result<VectorRef> {
88+
if indices.is_empty() {
89+
return Ok(self.slice(0, 0));
90+
}
91+
ensure!(
92+
indices.null_count() == 0,
93+
error::UnsupportedOperationSnafu {
94+
op: "taking a null index",
95+
vector_type: self.vector_type_name(),
96+
}
97+
);
98+
99+
let len = self.len();
100+
let arr = indices.to_arrow_array();
101+
let indices_arr = arr.as_any().downcast_ref::<UInt32Array>().unwrap();
102+
if !arrow::compute::min_boolean(
103+
&arrow::compute::lt_scalar(indices_arr, len as u32).unwrap(),
104+
)
105+
.unwrap()
106+
{
107+
panic!("Array index out of bounds, cannot take index out of the length of the array: {len}");
108+
}
109+
110+
Ok(Arc::new(ConstantVector::new(
111+
self.inner().clone(),
112+
indices.len(),
113+
)))
114+
}
86115
}
87116

88117
impl Vector for ConstantVector {

src/datatypes/src/vectors/operations.rs

+24-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ mod cast;
1616
mod filter;
1717
mod find_unique;
1818
mod replicate;
19+
mod take;
1920

2021
use common_base::BitVec;
2122

@@ -24,7 +25,7 @@ use crate::types::LogicalPrimitiveType;
2425
use crate::vectors::constant::ConstantVector;
2526
use crate::vectors::{
2627
BinaryVector, BooleanVector, ConcreteDataType, ListVector, NullVector, PrimitiveVector,
27-
StringVector, Vector, VectorRef,
28+
StringVector, UInt32Vector, Vector, VectorRef,
2829
};
2930

3031
/// Vector compute operations.
@@ -63,6 +64,12 @@ pub trait VectorOp {
6364
///
6465
/// TODO(dennis) describe behaviors in details.
6566
fn cast(&self, to_type: &ConcreteDataType) -> Result<VectorRef>;
67+
68+
/// Take elements from the vector by the given indices.
69+
///
70+
/// # Panics
71+
/// Panics if an index is out of bounds.
72+
fn take(&self, indices: &UInt32Vector) -> Result<VectorRef>;
6673
}
6774

6875
macro_rules! impl_scalar_vector_op {
@@ -84,6 +91,10 @@ macro_rules! impl_scalar_vector_op {
8491
fn cast(&self, to_type: &ConcreteDataType) -> Result<VectorRef> {
8592
cast::cast_non_constant!(self, to_type)
8693
}
94+
95+
fn take(&self, indices: &UInt32Vector) -> Result<VectorRef> {
96+
take::take_indices!(self, $VectorType, indices)
97+
}
8798
}
8899
)+};
89100
}
@@ -108,6 +119,10 @@ impl<T: LogicalPrimitiveType> VectorOp for PrimitiveVector<T> {
108119
fn cast(&self, to_type: &ConcreteDataType) -> Result<VectorRef> {
109120
cast::cast_non_constant!(self, to_type)
110121
}
122+
123+
fn take(&self, indices: &UInt32Vector) -> Result<VectorRef> {
124+
take::take_indices!(self, PrimitiveVector<T>, indices)
125+
}
111126
}
112127

113128
impl VectorOp for NullVector {
@@ -131,6 +146,10 @@ impl VectorOp for NullVector {
131146
}
132147
.fail()
133148
}
149+
150+
fn take(&self, indices: &UInt32Vector) -> Result<VectorRef> {
151+
take::take_indices!(self, NullVector, indices)
152+
}
134153
}
135154

136155
impl VectorOp for ConstantVector {
@@ -150,4 +169,8 @@ impl VectorOp for ConstantVector {
150169
fn cast(&self, to_type: &ConcreteDataType) -> Result<VectorRef> {
151170
self.cast_vector(to_type)
152171
}
172+
173+
fn take(&self, indices: &UInt32Vector) -> Result<VectorRef> {
174+
self.take_vector(indices)
175+
}
153176
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
// Copyright 2023 Greptime Team
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
macro_rules! take_indices {
16+
($vector: expr, $VectorType: ty, $indices: ident) => {{
17+
use std::sync::Arc;
18+
19+
use arrow::compute;
20+
use snafu::ResultExt;
21+
22+
let arrow_array = $vector.as_arrow();
23+
let taken = compute::take(arrow_array, $indices.as_arrow(), None)
24+
.context(crate::error::ArrowComputeSnafu)?;
25+
Ok(Arc::new(<$VectorType>::try_from_arrow_array(taken)?))
26+
}};
27+
}
28+
29+
pub(crate) use take_indices;
30+
31+
#[cfg(test)]
32+
mod tests {
33+
use std::sync::Arc;
34+
35+
use arrow::array::{PrimitiveArray, UInt32Array};
36+
use common_time::{Date, DateTime};
37+
38+
use crate::prelude::VectorRef;
39+
use crate::scalars::ScalarVector;
40+
use crate::timestamp::{
41+
TimestampMicrosecond, TimestampMillisecond, TimestampNanosecond, TimestampSecond,
42+
};
43+
use crate::types::{LogicalPrimitiveType, WrapperType};
44+
use crate::vectors::operations::VectorOp;
45+
use crate::vectors::{
46+
BooleanVector, ConstantVector, Int32Vector, NullVector, PrimitiveVector, StringVector,
47+
UInt32Vector,
48+
};
49+
50+
fn check_take_primitive<T>(
51+
input: Vec<Option<T::Native>>,
52+
indices: Vec<Option<u32>>,
53+
expect: Vec<Option<T::Native>>,
54+
) where
55+
T: LogicalPrimitiveType,
56+
PrimitiveArray<T::ArrowPrimitive>: From<Vec<Option<T::Native>>>,
57+
{
58+
let v = PrimitiveVector::<T>::new(PrimitiveArray::<T::ArrowPrimitive>::from(input));
59+
let indices = UInt32Vector::new(UInt32Array::from(indices));
60+
let output = v.take(&indices).unwrap();
61+
62+
let expected: VectorRef = Arc::new(PrimitiveVector::<T>::new(PrimitiveArray::<
63+
T::ArrowPrimitive,
64+
>::from(expect)));
65+
assert_eq!(expected, output);
66+
}
67+
68+
macro_rules! take_time_like_test {
69+
($VectorType: ident, $ValueType: ident, $method: ident) => {{
70+
use $crate::vectors::{$VectorType, VectorRef};
71+
72+
let v = $VectorType::from_iterator((0..5).map($ValueType::$method));
73+
let indices = UInt32Vector::from_slice(&[3, 0, 1, 4]);
74+
let out = v.take(&indices).unwrap();
75+
76+
let expect: VectorRef = Arc::new($VectorType::from_iterator(
77+
[3, 0, 1, 4].into_iter().map($ValueType::$method),
78+
));
79+
assert_eq!(expect, out);
80+
}};
81+
}
82+
83+
#[test]
84+
fn test_take_primitive() {
85+
// nullable int32
86+
check_take_primitive::<crate::types::Int32Type>(
87+
vec![Some(1), None, Some(3), Some(4), Some(-5)],
88+
vec![Some(3), None, Some(0), Some(1), Some(4)],
89+
vec![Some(4), None, Some(1), None, Some(-5)],
90+
);
91+
92+
// nullable float32
93+
check_take_primitive::<crate::types::Float32Type>(
94+
vec![Some(3.24), None, Some(1.34), Some(4.13), Some(5.13)],
95+
vec![Some(3), None, Some(0), Some(1), Some(4)],
96+
vec![Some(4.13), None, Some(3.24), None, Some(5.13)],
97+
);
98+
99+
// nullable uint32
100+
check_take_primitive::<crate::types::UInt32Type>(
101+
vec![Some(0), None, Some(2), Some(3), Some(4)],
102+
vec![Some(4), None, Some(2), Some(1), Some(3)],
103+
vec![Some(4), None, Some(2), None, Some(3)],
104+
);
105+
106+
// test date like type
107+
take_time_like_test!(DateVector, Date, new);
108+
take_time_like_test!(DateTimeVector, DateTime, new);
109+
take_time_like_test!(TimestampSecondVector, TimestampSecond, from_native);
110+
take_time_like_test!(
111+
TimestampMillisecondVector,
112+
TimestampMillisecond,
113+
from_native
114+
);
115+
take_time_like_test!(
116+
TimestampMicrosecondVector,
117+
TimestampMicrosecond,
118+
from_native
119+
);
120+
take_time_like_test!(TimestampNanosecondVector, TimestampNanosecond, from_native);
121+
}
122+
123+
fn check_take_constant(expect_length: usize, input_length: usize, indices: &[u32]) {
124+
let v = ConstantVector::new(Arc::new(Int32Vector::from_slice([111])), input_length);
125+
let indices = UInt32Vector::from_slice(indices);
126+
let out = v.take(&indices).unwrap();
127+
128+
assert!(out.is_const());
129+
assert_eq!(expect_length, out.len());
130+
}
131+
132+
#[test]
133+
fn test_take_constant() {
134+
check_take_constant(2, 5, &[3, 4]);
135+
check_take_constant(3, 10, &[1, 2, 3]);
136+
check_take_constant(4, 10, &[1, 5, 3, 6]);
137+
check_take_constant(5, 10, &[1, 9, 8, 7, 3]);
138+
}
139+
140+
#[test]
141+
#[should_panic]
142+
fn test_take_constant_out_of_index() {
143+
check_take_constant(2, 5, &[3, 5]);
144+
}
145+
146+
#[test]
147+
#[should_panic]
148+
fn test_take_out_of_index() {
149+
let v = Int32Vector::from_slice([1, 2, 3, 4, 5]);
150+
let indies = UInt32Vector::from_slice([1, 5, 6]);
151+
v.take(&indies).unwrap();
152+
}
153+
154+
#[test]
155+
fn test_take_null() {
156+
let v = NullVector::new(5);
157+
let indices = UInt32Vector::from_slice([1, 3, 2]);
158+
let out = v.take(&indices).unwrap();
159+
160+
let expect: VectorRef = Arc::new(NullVector::new(3));
161+
assert_eq!(expect, out);
162+
}
163+
164+
#[test]
165+
fn test_take_scalar() {
166+
let v = StringVector::from_slice(&["0", "1", "2", "3"]);
167+
let indices = UInt32Vector::from_slice([1, 3, 2]);
168+
let out = v.take(&indices).unwrap();
169+
170+
let expect: VectorRef = Arc::new(StringVector::from_slice(&["1", "3", "2"]));
171+
assert_eq!(expect, out);
172+
}
173+
174+
#[test]
175+
fn test_take_bool() {
176+
let v = BooleanVector::from_slice(&[false, true, false, true, false, false, true]);
177+
let indices = UInt32Vector::from_slice([1, 3, 5, 6]);
178+
let out = v.take(&indices).unwrap();
179+
let expected: VectorRef = Arc::new(BooleanVector::from_slice(&[true, true, false, true]));
180+
assert_eq!(out, expected);
181+
182+
let v = BooleanVector::from(vec![
183+
Some(true),
184+
None,
185+
Some(false),
186+
Some(true),
187+
Some(false),
188+
Some(false),
189+
Some(true),
190+
None,
191+
]);
192+
let indices = UInt32Vector::from(vec![Some(1), None, Some(3), Some(5), Some(6)]);
193+
let out = v.take(&indices).unwrap();
194+
let expected: VectorRef = Arc::new(BooleanVector::from(vec![
195+
None,
196+
None,
197+
Some(true),
198+
Some(false),
199+
Some(true),
200+
]));
201+
assert_eq!(out, expected);
202+
}
203+
}

0 commit comments

Comments
 (0)