Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into parquet-file-arro…
Browse files Browse the repository at this point in the history
…w-reader-try-new
  • Loading branch information
tustvold committed Jun 3, 2022
2 parents 6a55ce7 + eb706b7 commit cb37ee4
Show file tree
Hide file tree
Showing 17 changed files with 175 additions and 55 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
path: rust
fetch-depth: 0
- name: Setup Python
uses: actions/setup-python@v2
uses: actions/setup-python@v3
with:
python-version: 3.8
- name: Setup Archery
Expand All @@ -64,17 +64,17 @@ jobs:
rustup default ${{ matrix.rust }}
rustup component add rustfmt clippy
- name: Cache Cargo
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: /home/runner/.cargo
key: cargo-maturin-cache-
- name: Cache Rust dependencies
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: /home/runner/target
# this key is not equal because maturin uses different compilation flags.
key: ${{ runner.os }}-${{ matrix.arch }}-target-maturin-cache-${{ matrix.rust }}-
- uses: actions/setup-python@v2
- uses: actions/setup-python@v3
with:
python-version: '3.7'
- name: Upgrade pip and setuptools
Expand Down
36 changes: 18 additions & 18 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,14 @@ jobs:
steps:
- uses: actions/checkout@v2
- name: Cache Cargo
uses: actions/cache@v2
uses: actions/cache@v3
with:
# these represent dependencies downloaded by cargo
# and thus do not depend on the OS, arch nor rust version.
path: /github/home/.cargo
key: cargo-cache3-
- name: Cache Rust dependencies
uses: actions/cache@v2
uses: actions/cache@v3
with:
# these represent compiled steps of both dependencies and arrow
# and thus are specific for a particular OS, arch and rust version.
Expand Down Expand Up @@ -86,13 +86,13 @@ jobs:
with:
submodules: true
- name: Cache Cargo
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: /github/home/.cargo
# this key equals the ones on `linux-build-lib` for re-use
key: cargo-cache3-
- name: Cache Rust dependencies
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: /github/home/target
# this key equals the ones on `linux-build-lib` for re-use
Expand Down Expand Up @@ -154,12 +154,12 @@ jobs:
with:
submodules: true
- name: Cache Cargo
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: /github/home/.cargo
key: cargo-nightly-cache3-
- name: Cache Rust dependencies
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: /github/home/target
key: ${{ runner.os }}-${{ matrix.arch }}-target-nightly-cache3-${{ matrix.rust }}
Expand Down Expand Up @@ -226,13 +226,13 @@ jobs:
with:
submodules: true
- name: Cache Cargo
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: /github/home/.cargo
# this key equals the ones on `linux-build-lib` for re-use
key: cargo-cache3-
- name: Cache Rust dependencies
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: /github/home/target
# this key equals the ones on `linux-build-lib` for re-use
Expand Down Expand Up @@ -268,13 +268,13 @@ jobs:
with:
submodules: true
- name: Cache Cargo
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: /github/home/.cargo
# this key equals the ones on `linux-build-lib` for re-use
key: cargo-cache3-
- name: Cache Rust dependencies
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: /github/home/target
# this key equals the ones on `linux-build-lib` for re-use
Expand Down Expand Up @@ -321,13 +321,13 @@ jobs:
rustup default ${{ matrix.rust }}
rustup component add rustfmt clippy
- name: Cache Cargo
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: /home/runner/.cargo
# this key is not equal because the user is different than on a container (runner vs github)
key: cargo-coverage-cache3-
- name: Cache Rust dependencies
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: /home/runner/target
# this key is not equal because coverage uses different compilation flags.
Expand Down Expand Up @@ -369,12 +369,12 @@ jobs:
with:
submodules: true
- name: Cache Cargo
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: /github/home/.cargo
key: cargo-wasm32-cache3-
- name: Cache Rust dependencies
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: /github/home/target
key: ${{ runner.os }}-${{ matrix.arch }}-target-wasm32-cache3-${{ matrix.rust }}
Expand Down Expand Up @@ -416,12 +416,12 @@ jobs:
apt update
apt install -y libpython3.9-dev
- name: Cache Cargo
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: /github/home/.cargo
key: cargo-nightly-cache3-
- name: Cache Rust dependencies
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: /github/home/target
key: ${{ runner.os }}-${{ matrix.arch }}-target-nightly-cache3-${{ matrix.rust }}
Expand Down Expand Up @@ -453,13 +453,13 @@ jobs:
steps:
- uses: actions/checkout@v2
- name: Cache Cargo
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: /github/home/.cargo
# this key equals the ones on `linux-build-lib` for re-use
key: cargo-cache3-
- name: Cache Rust dependencies
uses: actions/cache@v2
uses: actions/cache@v3
with:
path: /github/home/target
# this key equals the ones on `linux-build-lib` for re-use
Expand Down
28 changes: 19 additions & 9 deletions arrow/src/array/array_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1480,7 +1480,7 @@ mod tests {
192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 36, 75, 238, 253,
255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
];
let array_data = ArrayData::builder(DataType::Decimal(23, 6))
let array_data = ArrayData::builder(DataType::Decimal(38, 6))
.len(2)
.add_buffer(Buffer::from(&values[..]))
.build()
Expand All @@ -1492,6 +1492,7 @@ mod tests {
}

#[test]
#[cfg(not(feature = "force_validate"))]
fn test_decimal_append_error_value() {
let mut decimal_builder = DecimalBuilder::new(10, 5, 3);
let mut result = decimal_builder.append_value(123456);
Expand All @@ -1500,9 +1501,15 @@ mod tests {
"Invalid argument error: 123456 is too large to store in a Decimal of precision 5. Max is 99999",
error.to_string()
);

unsafe {
decimal_builder.disable_value_validation();
}
result = decimal_builder.append_value(123456);
assert!(result.is_ok());
decimal_builder.append_value(12345).unwrap();
let arr = decimal_builder.finish();
assert_eq!("12.345", arr.value_as_string(0));
assert_eq!("12.345", arr.value_as_string(1));

decimal_builder = DecimalBuilder::new(10, 2, 1);
result = decimal_builder.append_value(100);
Expand All @@ -1511,18 +1518,21 @@ mod tests {
"Invalid argument error: 100 is too large to store in a Decimal of precision 2. Max is 99",
error.to_string()
);

unsafe {
decimal_builder.disable_value_validation();
}
result = decimal_builder.append_value(100);
assert!(result.is_ok());
decimal_builder.append_value(99).unwrap();
result = decimal_builder.append_value(-100);
error = result.unwrap_err();
assert_eq!(
"Invalid argument error: -100 is too small to store in a Decimal of precision 2. Min is -99",
error.to_string()
);
assert!(result.is_ok());
decimal_builder.append_value(-99).unwrap();
let arr = decimal_builder.finish();
assert_eq!("9.9", arr.value_as_string(0));
assert_eq!("-9.9", arr.value_as_string(1));
assert_eq!("9.9", arr.value_as_string(1));
assert_eq!("-9.9", arr.value_as_string(3));
}

#[test]
fn test_decimal_from_iter_values() {
let array = DecimalArray::from_iter_values(vec![-100, 0, 101].into_iter());
Expand Down
28 changes: 24 additions & 4 deletions arrow/src/array/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1165,6 +1165,10 @@ pub struct DecimalBuilder {
builder: FixedSizeListBuilder<UInt8Builder>,
precision: usize,
scale: usize,

/// Should i128 values be validated for compatibility with scale and precision?
/// defaults to true
value_validation: bool,
}

impl<OffsetSize: OffsetSizeTrait> ArrayBuilder for GenericBinaryBuilder<OffsetSize> {
Expand Down Expand Up @@ -1455,16 +1459,32 @@ impl DecimalBuilder {
builder: FixedSizeListBuilder::new(values_builder, byte_width),
precision,
scale,
value_validation: true,
}
}

/// Disable validation
///
/// # Safety
///
/// After disabling validation, caller must ensure that appended values are compatible
/// for the specified precision and scale.
pub unsafe fn disable_value_validation(&mut self) {
self.value_validation = false;
}

/// Appends a byte slice into the builder.
///
/// Automatically calls the `append` method to delimit the slice appended in as a
/// distinct array element.
#[inline]
pub fn append_value(&mut self, value: i128) -> Result<()> {
let value = validate_decimal_precision(value, self.precision)?;
let value = if self.value_validation {
validate_decimal_precision(value, self.precision)?
} else {
value
};

let value_as_bytes = Self::from_i128_to_fixed_size_bytes(
value,
self.builder.value_length() as usize,
Expand All @@ -1480,7 +1500,7 @@ impl DecimalBuilder {
self.builder.append(true)
}

fn from_i128_to_fixed_size_bytes(v: i128, size: usize) -> Result<Vec<u8>> {
pub(crate) fn from_i128_to_fixed_size_bytes(v: i128, size: usize) -> Result<Vec<u8>> {
if size > 16 {
return Err(ArrowError::InvalidArgumentError(
"DecimalBuilder only supports values up to 16 bytes.".to_string(),
Expand Down Expand Up @@ -3420,14 +3440,14 @@ mod tests {

#[test]
fn test_decimal_builder() {
let mut builder = DecimalBuilder::new(30, 23, 6);
let mut builder = DecimalBuilder::new(30, 38, 6);

builder.append_value(8_887_000_000).unwrap();
builder.append_null().unwrap();
builder.append_value(-8_887_000_000).unwrap();
let decimal_array: DecimalArray = builder.finish();

assert_eq!(&DataType::Decimal(23, 6), decimal_array.data_type());
assert_eq!(&DataType::Decimal(38, 6), decimal_array.data_type());
assert_eq!(3, decimal_array.len());
assert_eq!(1, decimal_array.null_count());
assert_eq!(32, decimal_array.value_offset(2));
Expand Down
54 changes: 51 additions & 3 deletions arrow/src/array/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
//! Contains `ArrayData`, a generic representation of Arrow array data which encapsulates
//! common attributes and operations for Arrow array.
use crate::datatypes::{DataType, IntervalUnit, UnionMode};
use crate::datatypes::{validate_decimal_precision, DataType, IntervalUnit, UnionMode};
use crate::error::{ArrowError, Result};
use crate::{bitmap::Bitmap, datatypes::ArrowNativeType};
use crate::{
Expand Down Expand Up @@ -999,6 +999,21 @@ impl ArrayData {

pub fn validate_dictionary_offset(&self) -> Result<()> {
match &self.data_type {
DataType::Decimal(p, _) => {
let values_buffer = &self.buffers[0];

for pos in 0..values_buffer.len() {
let raw_val = unsafe {
std::slice::from_raw_parts(
values_buffer.as_ptr().add(pos),
16_usize,
)
};
let value = i128::from_le_bytes(raw_val.try_into().unwrap());
validate_decimal_precision(value, *p)?;
}
Ok(())
}
DataType::Utf8 => self.validate_utf8::<i32>(),
DataType::LargeUtf8 => self.validate_utf8::<i64>(),
DataType::Binary => self.validate_offsets_full::<i32>(self.buffers[1].len()),
Expand Down Expand Up @@ -1492,8 +1507,9 @@ mod tests {
use std::ptr::NonNull;

use crate::array::{
make_array, Array, BooleanBuilder, Int32Array, Int32Builder, Int64Array,
StringArray, StructBuilder, UInt64Array,
make_array, Array, BooleanBuilder, DecimalBuilder, FixedSizeListBuilder,
Int32Array, Int32Builder, Int64Array, StringArray, StructBuilder, UInt64Array,
UInt8Builder,
};
use crate::buffer::Buffer;
use crate::datatypes::Field;
Expand Down Expand Up @@ -2707,4 +2723,36 @@ mod tests {

assert_eq!(array, &expected);
}

#[test]
#[cfg(not(feature = "force_validate"))]
fn test_decimal_full_validation() {
let values_builder = UInt8Builder::new(10);
let byte_width = 16;
let mut fixed_size_builder =
FixedSizeListBuilder::new(values_builder, byte_width);
let value_as_bytes = DecimalBuilder::from_i128_to_fixed_size_bytes(
123456,
fixed_size_builder.value_length() as usize,
)
.unwrap();
fixed_size_builder
.values()
.append_slice(value_as_bytes.as_slice())
.unwrap();
fixed_size_builder.append(true).unwrap();
let fixed_size_array = fixed_size_builder.finish();

// Build ArrayData for Decimal
let builder = ArrayData::builder(DataType::Decimal(5, 3))
.len(fixed_size_array.len())
.add_buffer(fixed_size_array.data_ref().child_data()[0].buffers()[0].clone());
let array_data = unsafe { builder.build_unchecked() };
let validation_result = array_data.validate_full();
let error = validation_result.unwrap_err();
assert_eq!(
"Invalid argument error: 123456 is too large to store in a Decimal of precision 5. Max is 99999",
error.to_string()
);
}
}
Loading

0 comments on commit cb37ee4

Please sign in to comment.