Skip to content

Commit

Permalink
feat(data type): implement Array data-type
Browse files Browse the repository at this point in the history
  • Loading branch information
b41sh committed May 7, 2022
1 parent 6985cc0 commit 559e9e0
Show file tree
Hide file tree
Showing 36 changed files with 1,024 additions and 17 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion common/ast/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ common-meta-types = { path = "../meta/types" }
# TODO(andylokandy): Use the version from crates.io once
# https://github.com/brendanzab/codespan/pull/331 is released.
codespan-reporting = { git = "https://github.com/brendanzab/codespan", rev = "c84116f5" }
sqlparser = { git = "https://github.com/datafuse-extras/sqlparser-rs", rev = "fee0056" }
sqlparser = { git = "https://github.com/datafuse-extras/sqlparser-rs", rev = "8db5b12" }

# Crates.io dependencies
async-trait = "0.1.53"
Expand Down
15 changes: 15 additions & 0 deletions common/ast/src/udfs/udf_expr_visitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ pub trait UDFExprVisitor: Sized + Send {
Expr::Extract { field, expr } => self.visit_extract(field, expr).await,
Expr::MapAccess { column, keys } => self.visit_map_access(column, keys).await,
Expr::Trim { expr, trim_where } => self.visit_trim(expr, trim_where).await,
Expr::Array(exprs) => self.visit_array(exprs).await,
other => Result::Err(ErrorCode::SyntaxException(format!(
"Unsupported expression: {}, type: {:?}",
expr, other
Expand Down Expand Up @@ -259,4 +260,18 @@ pub trait UDFExprVisitor: Sized + Send {
}
Ok(())
}

async fn visit_array(&mut self, exprs: &[Expr]) -> Result<()> {
match exprs.len() {
0 => Err(ErrorCode::SyntaxException(
"Array must have at least one element.",
)),
_ => {
for expr in exprs {
UDFExprTraverser::accept(expr, self).await?;
}
Ok(())
}
}
}
}
106 changes: 106 additions & 0 deletions common/datavalues/src/array_value.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
// Copyright 2022 Datafuse Labs.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::cmp::Ordering;
use std::fmt;
use std::fmt::Debug;
use std::fmt::Display;
use std::fmt::Formatter;

use crate::prelude::*;

#[derive(serde::Serialize, serde::Deserialize, Debug, Default, Clone, PartialEq)]
pub struct ArrayValue {
pub values: Vec<DataValue>,
}

impl Eq for ArrayValue {}

impl ArrayValue {
pub fn new(values: Vec<DataValue>) -> Self {
Self { values }
}
}

impl From<DataValue> for ArrayValue {
fn from(val: DataValue) -> Self {
match val {
DataValue::Array(v) => ArrayValue::new(v),
_ => ArrayValue::new(vec![val]),
}
}
}

impl PartialOrd for ArrayValue {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.as_scalar_ref().partial_cmp(&other.as_scalar_ref())
}
}

impl Ord for ArrayValue {
fn cmp(&self, other: &Self) -> Ordering {
self.partial_cmp(other).unwrap()
}
}

impl Display for ArrayValue {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self.values)
}
}

#[derive(Copy, Clone)]
pub enum ArrayValueRef<'a> {
Indexed { column: &'a ArrayColumn, idx: usize },
ValueRef { val: &'a ArrayValue },
}

impl PartialEq for ArrayValueRef<'_> {
fn eq(&self, _other: &Self) -> bool {
// TODO(b41sh): implement PartialEq for ArrayValueRef
false
}
}

impl PartialOrd for ArrayValueRef<'_> {
fn partial_cmp(&self, _other: &Self) -> Option<Ordering> {
// TODO(b41sh): implement PartialOrd for ArrayValueRef
Some(Ordering::Equal)
}
}

impl Eq for ArrayValueRef<'_> {}

impl Ord for ArrayValueRef<'_> {
fn cmp(&self, other: &Self) -> Ordering {
self.partial_cmp(other).unwrap()
}
}

impl Debug for ArrayValueRef<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
ArrayValueRef::Indexed { column, idx } => {
let value = column.get(*idx);
if let DataValue::Array(vals) = value {
for val in vals {
write!(f, "{:?}", val)?;
}
}
Ok(())
}
ArrayValueRef::ValueRef { val } => write!(f, "{:?}", val),
}
}
}
69 changes: 69 additions & 0 deletions common/datavalues/src/columns/array/iterator.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Copyright 2022 Datafuse Labs.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::iter::TrustedLen;

use crate::prelude::*;

#[derive(Debug, Clone)]
pub struct ArrayValueIter<'a> {
column: &'a ArrayColumn,
index: usize,
}

impl<'a> ArrayValueIter<'a> {
/// Creates a new [`ArrayValueIter`]
pub fn new(column: &'a ArrayColumn) -> Self {
Self { column, index: 0 }
}
}

impl<'a> Iterator for ArrayValueIter<'a> {
type Item = <ArrayValue as Scalar>::RefType<'a>;

#[inline]
fn next(&mut self) -> Option<Self::Item> {
let old = self.index;
if self.index >= self.column.len() {
return None;
} else {
self.index += 1;
}
Some(ArrayValueRef::Indexed {
column: self.column,
idx: old,
})
}

fn size_hint(&self) -> (usize, Option<usize>) {
(
self.column.len() - self.index,
Some(self.column.len() - self.index),
)
}
}

impl<'a> ExactSizeIterator for ArrayValueIter<'a> {
fn len(&self) -> usize {
self.column.len() - self.index
}
}

unsafe impl TrustedLen for ArrayValueIter<'_> {}

impl<'a> ArrayColumn {
pub fn iter(&'a self) -> ArrayValueIter<'a> {
ArrayValueIter::new(self)
}
}
22 changes: 22 additions & 0 deletions common/datavalues/src/columns/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ use common_arrow::arrow::types::Index;

use crate::prelude::*;

mod iterator;
mod mutable;

pub use iterator::*;
pub use mutable::*;

type LargeListArray = ListArray<i64>;

#[derive(Clone)]
Expand Down Expand Up @@ -162,6 +168,22 @@ impl Column for ArrayColumn {
}
}

impl ScalarColumn for ArrayColumn {
type Builder = MutableArrayColumn;
type OwnedItem = ArrayValue;
type RefItem<'a> = <ArrayValue as Scalar>::RefType<'a>;
type Iterator<'a> = ArrayValueIter<'a>;

#[inline]
fn get_data(&self, idx: usize) -> Self::RefItem<'_> {
ArrayValueRef::Indexed { column: self, idx }
}

fn scalar_iter(&self) -> Self::Iterator<'_> {
ArrayValueIter::new(self)
}
}

impl std::fmt::Debug for ArrayColumn {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut data = Vec::new();
Expand Down
Loading

0 comments on commit 559e9e0

Please sign in to comment.