Skip to content

Commit

Permalink
Fix undefined behavior in FFI (#323)
Browse files Browse the repository at this point in the history
- Fix UB due to aliasing
- Enable MIRI in CI for most tests in arrow crate

Signed-off-by: roee88 <roee88@gmail.com>
  • Loading branch information
roee88 authored May 23, 2021
1 parent a25cafb commit f316798
Show file tree
Hide file tree
Showing 8 changed files with 29 additions and 17 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -261,8 +261,9 @@ jobs:
export MIRIFLAGS="-Zmiri-disable-isolation"
cargo miri setup
cargo clean
# Ignore MIRI errors until we can get a clean run
cargo miri test || true
# Currently only the arrow crate is tested with miri
# IO related tests and some unsupported tests are skipped
cargo miri test -p arrow -- --skip csv --skip ipc --skip json
lint:
name: Lint
Expand Down
1 change: 1 addition & 0 deletions arrow/src/array/raw_pointer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ mod tests {

#[test]
#[should_panic(expected = "memory is not aligned")]
#[cfg_attr(miri, ignore)] // sometimes does not panic as expected
fn test_primitive_array_alignment() {
let bytes = vec![0u8, 1u8];
unsafe { RawPtrBox::<u64>::new(bytes.as_ptr().offset(1)) };
Expand Down
1 change: 1 addition & 0 deletions arrow/src/compute/kernels/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3521,6 +3521,7 @@ mod tests {
}

#[test]
#[cfg_attr(miri, ignore)] // running forever
fn test_can_cast_types() {
// this function attempts to ensure that can_cast_types stays
// in sync with cast. It simply tries all combinations of
Expand Down
1 change: 1 addition & 0 deletions arrow/src/compute/kernels/cast_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ mod tests {
}

#[test]
#[cfg_attr(miri, ignore)] // unsupported operation: can't call foreign function: mktime
fn string_to_timestamp_no_timezone() -> Result<()> {
// This test is designed to succeed in regardless of the local
// timezone the test machine is running. Thus it is still
Expand Down
2 changes: 2 additions & 0 deletions arrow/src/compute/kernels/length.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ mod tests {
}

#[test]
#[cfg_attr(miri, ignore)] // running forever
fn length_test_string() -> Result<()> {
length_cases()
.into_iter()
Expand All @@ -170,6 +171,7 @@ mod tests {
}

#[test]
#[cfg_attr(miri, ignore)] // running forever
fn length_test_large_string() -> Result<()> {
length_cases()
.into_iter()
Expand Down
31 changes: 16 additions & 15 deletions arrow/src/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,27 +153,30 @@ impl FFI_ArrowSchema {
_ => vec![],
};
// note: this cannot be done along with the above because the above is fallible and this op leaks.
let mut children_ptr = children_vec
let children_ptr = children_vec
.into_iter()
.map(Box::into_raw)
.collect::<Box<_>>();
let n_children = children_ptr.len() as i64;
let children = children_ptr.as_mut_ptr();

let flags = field.is_nullable() as i64 * 2;

let mut private = Box::new(SchemaPrivateData {
field,
children_ptr,
});

// <https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema>
Ok(FFI_ArrowSchema {
format: CString::new(format).unwrap().into_raw(),
name: CString::new(name).unwrap().into_raw(),
metadata: std::ptr::null_mut(),
flags: field.is_nullable() as i64 * 2,
flags,
n_children,
children,
children: private.children_ptr.as_mut_ptr(),
dictionary: std::ptr::null_mut(),
release: Some(release_schema),
private_data: Box::into_raw(Box::new(SchemaPrivateData {
field,
children_ptr,
})) as *mut ::std::os::raw::c_void,
private_data: Box::into_raw(private) as *mut ::std::os::raw::c_void,
})
}

Expand Down Expand Up @@ -448,27 +451,25 @@ impl FFI_ArrowArray {
.collect::<Vec<_>>();
let n_buffers = buffers.len() as i64;

let mut buffers_ptr = buffers
let buffers_ptr = buffers
.iter()
.map(|maybe_buffer| match maybe_buffer {
// note that `raw_data` takes into account the buffer's offset
Some(b) => b.as_ptr() as *const std::os::raw::c_void,
None => std::ptr::null(),
})
.collect::<Box<[_]>>();
let pointer = buffers_ptr.as_mut_ptr();

let mut children = data
let children = data
.child_data()
.iter()
.map(|child| Box::into_raw(Box::new(FFI_ArrowArray::new(child))))
.collect::<Box<_>>();
let children_ptr = children.as_mut_ptr();
let n_children = children.len() as i64;

// create the private data owning everything.
// any other data must be added here, e.g. via a struct, to track lifetime.
let private_data = Box::new(PrivateData {
let mut private_data = Box::new(PrivateData {
buffers,
buffers_ptr,
children,
Expand All @@ -480,8 +481,8 @@ impl FFI_ArrowArray {
offset: data.offset() as i64,
n_buffers,
n_children,
buffers: pointer,
children: children_ptr,
buffers: private_data.buffers_ptr.as_mut_ptr(),
children: private_data.children.as_mut_ptr(),
dictionary: std::ptr::null_mut(),
release: Some(release_array),
private_data: Box::into_raw(private_data) as *mut ::std::os::raw::c_void,
Expand Down
4 changes: 4 additions & 0 deletions arrow/src/util/bit_chunk_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ mod tests {
}

#[test]
#[cfg_attr(miri, ignore)] // error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory
fn test_iter_unaligned() {
let input: &[u8] = &[
0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000,
Expand All @@ -205,6 +206,7 @@ mod tests {
}

#[test]
#[cfg_attr(miri, ignore)] // error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory
fn test_iter_unaligned_remainder_1_byte() {
let input: &[u8] = &[
0b00000000, 0b00000001, 0b00000010, 0b00000100, 0b00001000, 0b00010000,
Expand All @@ -226,6 +228,7 @@ mod tests {
}

#[test]
#[cfg_attr(miri, ignore)] // error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory
fn test_iter_unaligned_remainder_bits_across_bytes() {
let input: &[u8] = &[0b00111111, 0b11111100];
let buffer: Buffer = Buffer::from(input);
Expand All @@ -239,6 +242,7 @@ mod tests {
}

#[test]
#[cfg_attr(miri, ignore)] // error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory
fn test_iter_unaligned_remainder_bits_large() {
let input: &[u8] = &[
0b11111111, 0b00000000, 0b11111111, 0b00000000, 0b11111111, 0b00000000,
Expand Down
1 change: 1 addition & 0 deletions arrow/src/util/integration_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -722,6 +722,7 @@ mod tests {
}

#[test]
#[cfg_attr(miri, ignore)] // running forever
fn test_arrow_data_equality() {
let secs_tz = Some("Europe/Budapest".to_string());
let millis_tz = Some("America/New_York".to_string());
Expand Down

0 comments on commit f316798

Please sign in to comment.