Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Split compute feature (#634)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao authored Nov 25, 2021
1 parent c3222aa commit e8279a4
Show file tree
Hide file tree
Showing 23 changed files with 178 additions and 154 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ jobs:
with:
use-cross: true
command: check
args: --features=merge_sort,io_ipc,io_csv,io_print,io_json,io_parquet --target ${{ matrix.target }}
args: --features=compute_merge_sort,io_ipc,io_csv,io_print,io_json,io_parquet --target ${{ matrix.target }}

linux-simd-test:
name: SIMD
Expand Down
86 changes: 52 additions & 34 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,10 @@ full = [
"io_avro_compression",
"io_avro_async",
"regex",
"merge_sort",
"compute",
# parses timezones used in timestamp conversions
"chrono-tz"
]
merge_sort = ["itertools"]
io_csv = ["io_csv_read", "io_csv_write"]
io_csv_async = ["io_csv_read_async"]
io_csv_read = ["csv", "lexical-core"]
Expand Down Expand Up @@ -149,7 +147,56 @@ io_avro_async = ["io_avro", "futures"]
io_json_integration = ["io_json", "serde_derive", "hex"]
io_print = ["comfy-table"]
# the compute kernels. Disabling this significantly reduces compile time.
compute = ["strength_reduce", "multiversion", "lexical-core", "ahash"]
compute_aggregate = ["multiversion"]
compute_arithmetics = ["strength_reduce"]
compute_bitwise = []
compute_boolean = []
compute_boolean_kleene = []
compute_cast = ["lexical-core"]
compute_comparison = []
compute_concatenate = []
compute_contains = []
compute_filter = []
compute_hash = ["multiversion", "ahash"]
compute_if_then_else = []
compute_length = []
compute_like = ["regex"]
compute_limit = []
compute_merge_sort = ["itertools", "compute_sort"]
compute_nullif = ["compute_comparison"]
compute_partition = ["compute_sort"]
compute_regex_match = ["regex"]
compute_sort = ["compute_take"]
compute_substring = []
compute_take = []
compute_temporal = []
compute_window = ["compute_concatenate"]
compute = [
"compute_aggregate",
"compute_arithmetics",
"compute_bitwise",
"compute_boolean",
"compute_boolean_kleene",
"compute_cast",
"compute_comparison",
"compute_concatenate",
"compute_contains",
"compute_filter",
"compute_hash",
"compute_if_then_else",
"compute_length",
"compute_like",
"compute_limit",
"compute_merge_sort",
"compute_nullif",
"compute_partition",
"compute_regex_match",
"compute_sort",
"compute_substring",
"compute_take",
"compute_temporal",
"compute_window",
]
# base64 + io_ipc because arrow schemas are stored as base64-encoded ipc format.
io_parquet = ["parquet2", "io_ipc", "base64", "futures"]
benchmarks = ["rand"]
Expand All @@ -159,36 +206,7 @@ simd = ["packed_simd"]
cache_aligned = []

[package.metadata.cargo-all-features]
skip_feature_sets = [
# full is tested independently and is not to be used with other features.
["full"],
# very small scope with no API changes.
["ahash"],
["benchmarks"],
["merge_sort"],
# io are additive APIs and do not interact
["io_csv"],
["io_csv_read"],
["io_csv_write"],
["io_csv_async"],
["io_csv_read_async"],
["io_avro"],
["io_avro_async"],
["io_avro_compression"],
["io_json"],
["io_flight"],
["io_ipc"],
["io_ipc_write_async"],
["io_parquet"],
["io_json_integration"],
# this does not change the public API
["io_parquet_compression"],
["io_ipc_compression"],
# tested in separate
["simd"],
]

skip_optional_dependencies = true
allowlist = ["compute", "compute_sort", "compute_hash", "compute_nullif"]

[[bench]]
name = "take_kernels"
Expand Down Expand Up @@ -251,7 +269,7 @@ name = "bitmap"
harness = false

[[bench]]
name = "concat"
name = "concatenate"
harness = false

[[bench]]
Expand Down
10 changes: 5 additions & 5 deletions benches/concat.rs → benches/concatenate.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
extern crate arrow2;

use arrow2::{
compute::concat,
compute::concatenate::concatenate,
datatypes::DataType,
util::bench_util::{create_boolean_array, create_primitive_array},
};
Expand All @@ -17,15 +17,15 @@ fn add_benchmark(c: &mut Criterion) {

c.bench_function(&format!("int32 concat aligned 2^{}", log2_size), |b| {
b.iter(|| {
let _ = concat::concatenate(&[&array1, &array2]);
let _ = concatenate(&[&array1, &array2]);
})
});

let array1 = create_primitive_array::<i32>(9, DataType::Int32, 0.5);

c.bench_function(&format!("int32 concat unaligned 2^{}", log2_size), |b| {
b.iter(|| {
let _ = concat::concatenate(&[&array1, &array2]);
let _ = concatenate(&[&array1, &array2]);
})
});

Expand All @@ -34,15 +34,15 @@ fn add_benchmark(c: &mut Criterion) {

c.bench_function(&format!("boolean concat aligned 2^{}", log2_size), |b| {
b.iter(|| {
let _ = concat::concatenate(&[&array1, &array2]);
let _ = concatenate(&[&array1, &array2]);
})
});

let array1 = create_boolean_array(9, 0.5, 0.5);

c.bench_function(&format!("boolean concat unaligned 2^{}", log2_size), |b| {
b.iter(|| {
let _ = concat::concatenate(&[&array1, &array2]);
let _ = concatenate(&[&array1, &array2]);
})
});
});
Expand Down
31 changes: 0 additions & 31 deletions src/compute/arithmetics/basic/common.rs

This file was deleted.

2 changes: 1 addition & 1 deletion src/compute/arithmetics/basic/div.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ use std::ops::Div;

use num_traits::{CheckedDiv, NumCast, Zero};

use crate::compute::arithmetics::basic::check_same_len;
use crate::datatypes::DataType;
use crate::{
array::{Array, PrimitiveArray},
compute::{
arithmetics::{ArrayCheckedDiv, ArrayDiv, NativeArithmetics},
arity::{binary, binary_checked, unary, unary_checked},
utils::check_same_len,
},
types::NativeType,
};
Expand Down
3 changes: 0 additions & 3 deletions src/compute/arithmetics/basic/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,6 @@ pub use rem::*;
mod sub;
pub use sub::*;

mod common;
pub(crate) use common::*;

use std::ops::Neg;

use num_traits::{CheckedNeg, WrappingNeg};
Expand Down
3 changes: 1 addition & 2 deletions src/compute/arithmetics/decimal/add.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
//! Defines the addition arithmetic kernels for [`PrimitiveArray`] representing decimals.
use crate::compute::arithmetics::basic::check_same_len;
use crate::{
array::{Array, PrimitiveArray},
buffer::Buffer,
compute::{
arithmetics::{ArrayAdd, ArrayCheckedAdd, ArraySaturatingAdd},
arity::{binary, binary_checked},
utils::combine_validities,
utils::{check_same_len, combine_validities},
},
};
use crate::{
Expand Down
3 changes: 1 addition & 2 deletions src/compute/arithmetics/decimal/div.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
//! Defines the division arithmetic kernels for Decimal
//! `PrimitiveArrays`.

use crate::compute::arithmetics::basic::check_same_len;
use crate::{
array::{Array, PrimitiveArray},
buffer::Buffer,
compute::{
arithmetics::{ArrayCheckedDiv, ArrayDiv},
arity::{binary, binary_checked},
utils::combine_validities,
utils::{check_same_len, combine_validities},
},
datatypes::DataType,
error::{ArrowError, Result},
Expand Down
3 changes: 1 addition & 2 deletions src/compute/arithmetics/decimal/mul.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
//! Defines the multiplication arithmetic kernels for Decimal
//! `PrimitiveArrays`.

use crate::compute::arithmetics::basic::check_same_len;
use crate::{
array::{Array, PrimitiveArray},
buffer::Buffer,
compute::{
arithmetics::{ArrayCheckedMul, ArrayMul, ArraySaturatingMul},
arity::{binary, binary_checked},
utils::combine_validities,
utils::{check_same_len, combine_validities},
},
datatypes::DataType,
error::{ArrowError, Result},
Expand Down
3 changes: 1 addition & 2 deletions src/compute/arithmetics/decimal/sub.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
//! Defines the subtract arithmetic kernels for Decimal `PrimitiveArrays`.

use crate::compute::arithmetics::basic::check_same_len;
use crate::{
array::{Array, PrimitiveArray},
buffer::Buffer,
compute::{
arithmetics::{ArrayCheckedSub, ArraySaturatingSub, ArraySub},
arity::{binary, binary_checked},
utils::combine_validities,
utils::{check_same_len, combine_validities},
},
datatypes::DataType,
error::{ArrowError, Result},
Expand Down
3 changes: 1 addition & 2 deletions src/compute/arity.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
//! Defines kernels suitable to perform operations to primitive arrays.

use super::utils::combine_validities;
use crate::compute::arithmetics::basic::check_same_len;
use super::utils::{check_same_len, combine_validities};
use crate::{
array::PrimitiveArray,
bitmap::{Bitmap, MutableBitmap},
Expand Down
23 changes: 3 additions & 20 deletions src/compute/concat.rs → src/compute/concatenate.rs
Original file line number Diff line number Diff line change
@@ -1,31 +1,14 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//! Contains the concatenate kernel
//!
//! Example:
//!
//! ```
//! use arrow2::array::Utf8Array;
//! use arrow2::compute::concat::concatenate;
//! use arrow2::compute::concatenate::concatenate;
//!
//! let arr = concatenate(&[
//! &Utf8Array::<i32>::from_slice(vec!["hello", "world"]),
//! &Utf8Array::<i32>::from_slice(vec!["!"]),
//! &Utf8Array::<i32>::from_slice(["hello", "world"]),
//! &Utf8Array::<i32>::from_slice(["!"]),
//! ]).unwrap();
//! assert_eq!(arr.len(), 3);
//! ```
Expand Down
4 changes: 1 addition & 3 deletions src/compute/contains.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
//! Declares the [`contains`] operator

use crate::types::NativeType;
use crate::{
array::{Array, BinaryArray, BooleanArray, ListArray, Offset, PrimitiveArray, Utf8Array},
bitmap::Bitmap,
};
use crate::{
datatypes::DataType,
error::{ArrowError, Result},
types::NativeType,
};

use super::utils::combine_validities;
Expand Down
4 changes: 2 additions & 2 deletions src/compute/if_then_else.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Contains the operator [`if_then_else`].
use crate::array::growable;
use crate::array::{growable, Array, BooleanArray};
use crate::bitmap::utils::SlicesIterator;
use crate::error::{ArrowError, Result};
use crate::{array::*, bitmap::utils::SlicesIterator};

/// Returns the values from `lhs` if the predicate is `true` or from the `lhs` if the predicate is false
/// Returns `None` if the predicate is `None`.
Expand Down
5 changes: 3 additions & 2 deletions src/compute/like.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@ use std::collections::HashMap;
use regex::bytes::Regex as BytesRegex;
use regex::Regex;

use crate::datatypes::DataType;
use crate::{array::*, bitmap::Bitmap};
use crate::{
array::{BinaryArray, BooleanArray, Offset, Utf8Array},
bitmap::Bitmap,
compute::utils::combine_validities,
datatypes::DataType,
error::{ArrowError, Result},
};

Expand Down
Loading

0 comments on commit e8279a4

Please sign in to comment.