Skip to content

Commit

Permalink
run clippy to lint arrow crate in CI
Browse files Browse the repository at this point in the history
  • Loading branch information
houqp committed Jun 20, 2020
1 parent 25a9567 commit a70d490
Show file tree
Hide file tree
Showing 16 changed files with 200 additions and 94 deletions.
21 changes: 21 additions & 0 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,27 @@ env:
ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}

jobs:
lint:
name: Lint with clippy
strategy:
matrix:
rust: [nightly-2020-04-22]
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
runs-on: ubuntu-latest
steps:
- name: Checkout Arrow
uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Install Rust
uses: actions-rs/toolchain@v1
with:
toolchain: ${{ matrix.rust }}
override: true
components: clippy
- name: Run Clippy
shell: bash
run: ci/scripts/rust_lint.sh

debian:
name: AMD64 Debian 10 Rust ${{ matrix.rust }}
Expand Down
39 changes: 39 additions & 0 deletions ci/scripts/rust_lint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

set -e

pushd rust/arrow
max_error_cnt=3
max_warn_cnt=9

error_msg=$(cargo clippy -- -A clippy::redundant_field_names 2>&1 | grep 'error: aborting due to')

error_cnt=$(echo "${error_msg}" | awk '{print $5}')
[[ ${error_cnt} -gt ${max_error_cnt} ]] && {
echo "More clippy errors introduced, got: ${error_cnt}, was: ${max_error_cnt}"
exit 1
}

warn_cnt=$(echo "${error_msg}" | awk '{print $8}')
[[ ${warn_cnt} -gt ${max_warn_cnt} ]] && {
echo "More clippy warnings introduced, got: ${warn_cnt}, was: ${max_warn_cnt}"
exit 1
}
popd
4 changes: 2 additions & 2 deletions rust/arrow/src/array/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1143,7 +1143,7 @@ fn append_binary_data(
DataType::List(Box::new(DataType::UInt8)),
array.len(),
None,
array.null_buffer().map(|buf| buf.clone()),
array.null_buffer().cloned(),
array.offset(),
vec![(&array.buffers()[0]).clone()],
vec![int_data],
Expand Down Expand Up @@ -1195,7 +1195,7 @@ impl ArrayBuilder for FixedSizeBinaryBuilder {
DataType::FixedSizeList(Box::new(DataType::UInt8), self.builder.list_len),
array.len(),
None,
array.null_buffer().map(|buf| buf.clone()),
array.null_buffer().cloned(),
array.offset(),
vec![],
vec![int_data],
Expand Down
4 changes: 4 additions & 0 deletions rust/arrow/src/array/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,10 @@ impl ArrayData {
self.len
}

pub fn is_empty(&self) -> bool {
self.len == 0
}

/// Returns the offset of this array
pub fn offset(&self) -> usize {
self.offset
Expand Down
1 change: 1 addition & 0 deletions rust/arrow/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
//! )
//! ```
#[allow(clippy::module_inception)]
mod array;
mod builder;
mod cast;
Expand Down
96 changes: 47 additions & 49 deletions rust/arrow/src/array/ord.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,63 +69,61 @@ impl OrdArray for NullArray {
}

/// Convert ArrayRef to OrdArray trait object
pub fn as_ordarray(values: &ArrayRef) -> Result<Box<&OrdArray>> {
pub fn as_ordarray(values: &ArrayRef) -> Result<&OrdArray> {
match values.data_type() {
DataType::Boolean => Ok(Box::new(as_boolean_array(&values))),
DataType::Utf8 => Ok(Box::new(as_string_array(&values))),
DataType::Null => Ok(Box::new(as_null_array(&values))),
DataType::Int8 => Ok(Box::new(as_primitive_array::<Int8Type>(&values))),
DataType::Int16 => Ok(Box::new(as_primitive_array::<Int16Type>(&values))),
DataType::Int32 => Ok(Box::new(as_primitive_array::<Int32Type>(&values))),
DataType::Int64 => Ok(Box::new(as_primitive_array::<Int64Type>(&values))),
DataType::UInt8 => Ok(Box::new(as_primitive_array::<UInt8Type>(&values))),
DataType::UInt16 => Ok(Box::new(as_primitive_array::<UInt16Type>(&values))),
DataType::UInt32 => Ok(Box::new(as_primitive_array::<UInt32Type>(&values))),
DataType::UInt64 => Ok(Box::new(as_primitive_array::<UInt64Type>(&values))),
DataType::Date32(_) => Ok(Box::new(as_primitive_array::<Date32Type>(&values))),
DataType::Date64(_) => Ok(Box::new(as_primitive_array::<Date64Type>(&values))),
DataType::Time32(Second) => {
Ok(Box::new(as_primitive_array::<Time32SecondType>(&values)))
DataType::Boolean => Ok(as_boolean_array(&values)),
DataType::Utf8 => Ok(as_string_array(&values)),
DataType::Null => Ok(as_null_array(&values)),
DataType::Int8 => Ok(as_primitive_array::<Int8Type>(&values)),
DataType::Int16 => Ok(as_primitive_array::<Int16Type>(&values)),
DataType::Int32 => Ok(as_primitive_array::<Int32Type>(&values)),
DataType::Int64 => Ok(as_primitive_array::<Int64Type>(&values)),
DataType::UInt8 => Ok(as_primitive_array::<UInt8Type>(&values)),
DataType::UInt16 => Ok(as_primitive_array::<UInt16Type>(&values)),
DataType::UInt32 => Ok(as_primitive_array::<UInt32Type>(&values)),
DataType::UInt64 => Ok(as_primitive_array::<UInt64Type>(&values)),
DataType::Date32(_) => Ok(as_primitive_array::<Date32Type>(&values)),
DataType::Date64(_) => Ok(as_primitive_array::<Date64Type>(&values)),
DataType::Time32(Second) => Ok(as_primitive_array::<Time32SecondType>(&values)),
DataType::Time32(Millisecond) => {
Ok(as_primitive_array::<Time32MillisecondType>(&values))
}
DataType::Time64(Microsecond) => {
Ok(as_primitive_array::<Time64MicrosecondType>(&values))
}
DataType::Time64(Nanosecond) => {
Ok(as_primitive_array::<Time64NanosecondType>(&values))
}
DataType::Time32(Millisecond) => Ok(Box::new(as_primitive_array::<
Time32MillisecondType,
>(&values))),
DataType::Time64(Microsecond) => Ok(Box::new(as_primitive_array::<
Time64MicrosecondType,
>(&values))),
DataType::Time64(Nanosecond) => Ok(Box::new(as_primitive_array::<
Time64NanosecondType,
>(&values))),
DataType::Timestamp(Second, _) => {
Ok(Box::new(as_primitive_array::<TimestampSecondType>(&values)))
Ok(as_primitive_array::<TimestampSecondType>(&values))
}
DataType::Timestamp(Millisecond, _) => {
Ok(as_primitive_array::<TimestampMillisecondType>(&values))
}
DataType::Timestamp(Microsecond, _) => {
Ok(as_primitive_array::<TimestampMicrosecondType>(&values))
}
DataType::Timestamp(Nanosecond, _) => {
Ok(as_primitive_array::<TimestampNanosecondType>(&values))
}
DataType::Interval(IntervalUnit::YearMonth) => {
Ok(as_primitive_array::<IntervalYearMonthType>(&values))
}
DataType::Timestamp(Millisecond, _) => Ok(Box::new(as_primitive_array::<
TimestampMillisecondType,
>(&values))),
DataType::Timestamp(Microsecond, _) => Ok(Box::new(as_primitive_array::<
TimestampMicrosecondType,
>(&values))),
DataType::Timestamp(Nanosecond, _) => Ok(Box::new(as_primitive_array::<
TimestampNanosecondType,
>(&values))),
DataType::Interval(IntervalUnit::YearMonth) => Ok(Box::new(
as_primitive_array::<IntervalYearMonthType>(&values),
)),
DataType::Interval(IntervalUnit::DayTime) => {
Ok(Box::new(as_primitive_array::<IntervalDayTimeType>(&values)))
Ok(as_primitive_array::<IntervalDayTimeType>(&values))
}
DataType::Duration(TimeUnit::Second) => {
Ok(Box::new(as_primitive_array::<DurationSecondType>(&values)))
Ok(as_primitive_array::<DurationSecondType>(&values))
}
DataType::Duration(TimeUnit::Millisecond) => {
Ok(as_primitive_array::<DurationMillisecondType>(&values))
}
DataType::Duration(TimeUnit::Microsecond) => {
Ok(as_primitive_array::<DurationMicrosecondType>(&values))
}
DataType::Duration(TimeUnit::Nanosecond) => {
Ok(as_primitive_array::<DurationNanosecondType>(&values))
}
DataType::Duration(TimeUnit::Millisecond) => Ok(Box::new(as_primitive_array::<
DurationMillisecondType,
>(&values))),
DataType::Duration(TimeUnit::Microsecond) => Ok(Box::new(as_primitive_array::<
DurationMicrosecondType,
>(&values))),
DataType::Duration(TimeUnit::Nanosecond) => Ok(Box::new(as_primitive_array::<
DurationNanosecondType,
>(&values))),
t => Err(ArrowError::ComputeError(format!(
"Lexical Sort not supported for data type {:?}",
t
Expand Down
43 changes: 20 additions & 23 deletions rust/arrow/src/array/union.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ impl UnionArray {
.iter()
.filter(|i| *i < &0)
.collect::<Vec<&i8>>();
if invalid_type_ids.len() > 0 {
if !invalid_type_ids.is_empty() {
return Err(ArrowError::InvalidArgumentError(format!(
"Type Ids must be positive and cannot be greater than the number of \
child arrays, found:\n{:?}",
Expand All @@ -188,7 +188,7 @@ impl UnionArray {
.iter()
.filter(|i| *i < &0 || *i > &max_len)
.collect::<Vec<&i32>>();
if invalid_offsets.len() > 0 {
if !invalid_offsets.is_empty() {
return Err(ArrowError::InvalidArgumentError(format!(
"Offsets must be positive and within the length of the Array, \
found:\n{:?}",
Expand Down Expand Up @@ -325,7 +325,7 @@ impl fmt::Debug for UnionArray {
column.data_type()
)?;
fmt::Debug::fmt(column, f)?;
writeln!(f, "")?;
writeln!(f)?;
}
writeln!(f, "]")
}
Expand Down Expand Up @@ -484,7 +484,7 @@ impl UnionBuilder {

/// Appends a null to this builder.
pub fn append_null(&mut self) -> Result<()> {
if let None = self.bitmap_builder {
if self.bitmap_builder.is_none() {
let mut builder = BooleanBufferBuilder::new(self.len + 1);
for _ in 0..self.len {
builder.append(true)?;
Expand All @@ -499,7 +499,7 @@ impl UnionBuilder {
self.type_id_builder.append(i8::default())?;

// Handle sparse union
if let None = self.value_offset_builder {
if self.value_offset_builder.is_none() {
for (_, fd) in self.fields.iter_mut() {
fd.append_null_dynamic()?;
}
Expand All @@ -518,25 +518,22 @@ impl UnionBuilder {

let mut field_data = match self.fields.remove(&type_name) {
Some(data) => data,
None => {
let field_data = match self.value_offset_builder {
Some(_) => {
FieldData::new(self.fields.len() as i8, T::get_data_type(), None)
}
None => {
let mut fd = FieldData::new(
self.fields.len() as i8,
T::get_data_type(),
Some(BooleanBufferBuilder::new(1)),
);
for _ in 0..self.len {
fd.append_null::<T>()?;
}
fd
None => match self.value_offset_builder {
Some(_) => {
FieldData::new(self.fields.len() as i8, T::get_data_type(), None)
}
None => {
let mut fd = FieldData::new(
self.fields.len() as i8,
T::get_data_type(),
Some(BooleanBufferBuilder::new(1)),
);
for _ in 0..self.len {
fd.append_null::<T>()?;
}
};
field_data
}
fd
}
},
};
self.type_id_builder.append(field_data.type_id)?;

Expand Down
6 changes: 5 additions & 1 deletion rust/arrow/src/bitmap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ impl Bitmap {
self.bits.len()
}

pub fn is_empty(&self) -> bool {
self.bits.is_empty()
}

pub fn is_set(&self, i: usize) -> bool {
assert!(i < (self.bits.len() << 3));
unsafe { bit_util::get_bit_raw(self.bits.raw_data(), i) }
Expand All @@ -60,7 +64,7 @@ impl Bitmap {
&self.bits
}

pub fn to_buffer(self) -> Buffer {
pub fn into_buffer(self) -> Buffer {
self.bits
}
}
Expand Down
4 changes: 2 additions & 2 deletions rust/arrow/src/compute/kernels/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ pub fn lexsort_to_indices(columns: &Vec<SortColumn>) -> Result<UInt32Array> {
// convert ArrayRefs to OrdArray trait objects and perform row count check
let flat_columns = columns
.iter()
.map(|column| -> Result<(Box<&OrdArray>, SortOptions)> {
.map(|column| -> Result<(&OrdArray, SortOptions)> {
// row count check
let curr_row_count = column.values.len() - column.values.offset();
match row_count {
Expand All @@ -292,7 +292,7 @@ pub fn lexsort_to_indices(columns: &Vec<SortColumn>) -> Result<UInt32Array> {
column.options.unwrap_or_default(),
))
})
.collect::<Result<Vec<(Box<&OrdArray>, SortOptions)>>>()?;
.collect::<Result<Vec<(&OrdArray, SortOptions)>>>()?;

let lex_comparator = |a_idx: &usize, b_idx: &usize| -> Ordering {
for column in flat_columns.iter() {
Expand Down
12 changes: 5 additions & 7 deletions rust/arrow/src/csv/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ fn infer_file_schema<R: Read + Seek>(
///
/// If `max_read_records` is not set, all files will be read fully to infer the schema.
pub fn infer_schema_from_files(
files: &Vec<String>,
files: &[String],
delimiter: u8,
max_read_records: Option<usize>,
has_header: bool,
Expand All @@ -207,7 +207,7 @@ pub fn infer_schema_from_files(
}
schemas.push(schema.clone());
records_to_read -= records_read;
if records_to_read <= 0 {
if records_to_read == 0 {
break;
}
}
Expand Down Expand Up @@ -283,11 +283,8 @@ impl<R: Read> Reader<R> {
let mut reader_builder = csv_crate::ReaderBuilder::new();
reader_builder.has_headers(has_header);

match delimiter {
Some(c) => {
reader_builder.delimiter(c);
}
_ => (),
if let Some(c) = delimiter {
reader_builder.delimiter(c);
}

let csv_reader = reader_builder.from_reader(buf_reader);
Expand All @@ -302,6 +299,7 @@ impl<R: Read> Reader<R> {
}

/// Read the next batch of rows
#[allow(clippy::should_implement_trait)]
pub fn next(&mut self) -> Result<Option<RecordBatch>> {
// read a batch of rows into memory
let mut rows: Vec<StringRecord> = Vec::with_capacity(self.batch_size);
Expand Down
Loading

0 comments on commit a70d490

Please sign in to comment.