Skip to content

Commit

Permalink
tests: make it tractable to run Miri
Browse files Browse the repository at this point in the history
We make an absolute mess of our tests so that 'cargo miri test' will
complete in reasonable time. I hate this, but Miri is worth it.

Ref #121
  • Loading branch information
BurntSushi committed Sep 2, 2022
1 parent 50086e7 commit 0ee9b5e
Show file tree
Hide file tree
Showing 8 changed files with 46 additions and 15 deletions.
23 changes: 13 additions & 10 deletions src/ascii.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,37 +23,37 @@ use core::mem;
// means we can effectively skip the _mm_cmpeq_epi8 step and jump straight to
// _mm_movemask_epi8.

#[cfg(any(test, not(target_arch = "x86_64")))]
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
const USIZE_BYTES: usize = mem::size_of::<usize>();
#[cfg(any(test, not(target_arch = "x86_64")))]
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
const FALLBACK_LOOP_SIZE: usize = 2 * USIZE_BYTES;

// This is a mask where the most significant bit of each byte in the usize
// is set. We test this bit to determine whether a character is ASCII or not.
// Namely, a single byte is regarded as an ASCII codepoint if and only if it's
// most significant bit is not set.
#[cfg(any(test, not(target_arch = "x86_64")))]
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
const ASCII_MASK_U64: u64 = 0x8080808080808080;
#[cfg(any(test, not(target_arch = "x86_64")))]
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
const ASCII_MASK: usize = ASCII_MASK_U64 as usize;

/// Returns the index of the first non ASCII byte in the given slice.
///
/// If slice only contains ASCII bytes, then the length of the slice is
/// returned.
pub fn first_non_ascii_byte(slice: &[u8]) -> usize {
#[cfg(not(target_arch = "x86_64"))]
#[cfg(any(miri, not(target_arch = "x86_64")))]
{
first_non_ascii_byte_fallback(slice)
}

#[cfg(target_arch = "x86_64")]
#[cfg(all(not(miri), target_arch = "x86_64"))]
{
first_non_ascii_byte_sse2(slice)
}
}

#[cfg(any(test, not(target_arch = "x86_64")))]
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
fn first_non_ascii_byte_fallback(slice: &[u8]) -> usize {
let align = USIZE_BYTES - 1;
let start_ptr = slice.as_ptr();
Expand Down Expand Up @@ -115,7 +115,7 @@ fn first_non_ascii_byte_fallback(slice: &[u8]) -> usize {
}
}

#[cfg(target_arch = "x86_64")]
#[cfg(all(not(miri), target_arch = "x86_64"))]
fn first_non_ascii_byte_sse2(slice: &[u8]) -> usize {
use core::arch::x86_64::*;

Expand Down Expand Up @@ -221,7 +221,7 @@ unsafe fn first_non_ascii_byte_slow(
/// bytes is not an ASCII byte.
///
/// The position returned is always in the inclusive range [0, 7].
#[cfg(any(test, not(target_arch = "x86_64")))]
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
fn first_non_ascii_byte_mask(mask: usize) -> usize {
#[cfg(target_endian = "little")]
{
Expand All @@ -245,7 +245,7 @@ unsafe fn ptr_sub(ptr: *const u8, amt: usize) -> *const u8 {
ptr.offset((amt as isize).wrapping_neg())
}

#[cfg(any(test, not(target_arch = "x86_64")))]
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
unsafe fn read_unaligned_usize(ptr: *const u8) -> usize {
use core::ptr;

Expand Down Expand Up @@ -286,6 +286,7 @@ mod tests {

#[test]
#[cfg(target_arch = "x86_64")]
#[cfg(not(miri))]
fn positive_sse2_forward() {
for i in 0..517 {
let b = "a".repeat(i).into_bytes();
Expand All @@ -294,6 +295,7 @@ mod tests {
}

#[test]
#[cfg(not(miri))]
fn negative_fallback_forward() {
for i in 0..517 {
for align in 0..65 {
Expand All @@ -315,6 +317,7 @@ mod tests {

#[test]
#[cfg(target_arch = "x86_64")]
#[cfg(not(miri))]
fn negative_sse2_forward() {
for i in 0..517 {
for align in 0..65 {
Expand Down
2 changes: 1 addition & 1 deletion src/byteset/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ pub(crate) fn rfind_not(haystack: &[u8], byteset: &[u8]) -> Option<usize> {
}
}

#[cfg(all(test, feature = "std"))]
#[cfg(all(test, feature = "std", not(miri)))]
mod tests {
quickcheck::quickcheck! {
fn qc_byteset_forward_matches_naive(
Expand Down
16 changes: 13 additions & 3 deletions src/byteset/scalar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -192,10 +192,15 @@ mod tests {
type TestCase = (Vec<u8>, u8, Option<(usize, usize)>);

fn build_tests() -> Vec<TestCase> {
#[cfg(not(miri))]
const MAX_PER: usize = 515;
#[cfg(miri)]
const MAX_PER: usize = 10;

let mut result = vec![];
for &(search, byte, fwd_pos, rev_pos) in TESTS {
result.push((search.to_vec(), byte, Some((fwd_pos, rev_pos))));
for i in 1..515 {
for i in 1..MAX_PER {
// add a bunch of copies of the search byte to the end.
let mut suffixed: Vec<u8> = search.into();
suffixed.extend(std::iter::repeat(byte).take(i));
Expand Down Expand Up @@ -225,7 +230,7 @@ mod tests {
}

// build non-matching tests for several sizes
for i in 0..515 {
for i in 0..MAX_PER {
result.push((
std::iter::repeat(b'\0').take(i).collect(),
b'\0',
Expand All @@ -240,6 +245,11 @@ mod tests {
fn test_inv_memchr() {
use crate::{ByteSlice, B};

#[cfg(not(miri))]
const MAX_OFFSET: usize = 130;
#[cfg(miri)]
const MAX_OFFSET: usize = 13;

for (search, byte, matching) in build_tests() {
assert_eq!(
inv_memchr(byte, &search),
Expand All @@ -259,7 +269,7 @@ mod tests {
);
// Test a rather large number off offsets for potential alignment
// issues.
for offset in 1..130 {
for offset in 1..MAX_OFFSET {
if offset >= search.len() {
break;
}
Expand Down
5 changes: 4 additions & 1 deletion src/impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -874,7 +874,9 @@ mod bstring_serde {

#[cfg(all(test, feature = "std"))]
mod display {
use crate::{bstring::BString, ByteSlice};
#[cfg(not(miri))]
use crate::bstring::BString;
use crate::ByteSlice;

#[test]
fn clean() {
Expand Down Expand Up @@ -972,6 +974,7 @@ mod display {
);
}

#[cfg(not(miri))]
quickcheck::quickcheck! {
fn total_length(bstr: BString) -> bool {
let size = bstr.chars().count();
Expand Down
6 changes: 6 additions & 0 deletions src/unicode/grapheme.rs
Original file line number Diff line number Diff line change
Expand Up @@ -263,13 +263,15 @@ fn adjust_rev_for_regional_indicator(mut bs: &[u8], i: usize) -> usize {

#[cfg(all(test, feature = "std"))]
mod tests {
#[cfg(not(miri))]
use ucd_parse::GraphemeClusterBreakTest;

use crate::{ext_slice::ByteSlice, tests::LOSSY_TESTS};

use super::*;

#[test]
#[cfg(not(miri))]
fn forward_ucd() {
for (i, test) in ucdtests().into_iter().enumerate() {
let given = test.grapheme_clusters.concat();
Expand All @@ -292,6 +294,7 @@ mod tests {
}

#[test]
#[cfg(not(miri))]
fn reverse_ucd() {
for (i, test) in ucdtests().into_iter().enumerate() {
let given = test.grapheme_clusters.concat();
Expand Down Expand Up @@ -333,15 +336,18 @@ mod tests {
}
}

#[cfg(not(miri))]
fn uniescape(s: &str) -> String {
s.chars().flat_map(|c| c.escape_unicode()).collect::<String>()
}

#[cfg(not(miri))]
fn uniescape_vec(strs: &[String]) -> Vec<String> {
strs.iter().map(|s| uniescape(s)).collect()
}

/// Return all of the UCD for grapheme breaks.
#[cfg(not(miri))]
fn ucdtests() -> Vec<GraphemeClusterBreakTest> {
const TESTDATA: &'static str =
include_str!("data/GraphemeBreakTest.txt");
Expand Down
4 changes: 4 additions & 0 deletions src/unicode/sentence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,11 +159,13 @@ fn decode_sentence(bs: &[u8]) -> (&str, usize) {

#[cfg(all(test, feature = "std"))]
mod tests {
#[cfg(not(miri))]
use ucd_parse::SentenceBreakTest;

use crate::ext_slice::ByteSlice;

#[test]
#[cfg(not(miri))]
fn forward_ucd() {
for (i, test) in ucdtests().into_iter().enumerate() {
let given = test.sentences.concat();
Expand Down Expand Up @@ -199,11 +201,13 @@ mod tests {
bytes.sentences().collect()
}

#[cfg(not(miri))]
fn strs_to_bstrs<S: AsRef<str>>(strs: &[S]) -> Vec<&[u8]> {
strs.iter().map(|s| s.as_ref().as_bytes()).collect()
}

/// Return all of the UCD for sentence breaks.
#[cfg(not(miri))]
fn ucdtests() -> Vec<SentenceBreakTest> {
const TESTDATA: &'static str =
include_str!("data/SentenceBreakTest.txt");
Expand Down
4 changes: 4 additions & 0 deletions src/unicode/word.rs
Original file line number Diff line number Diff line change
Expand Up @@ -321,11 +321,13 @@ fn decode_word(bs: &[u8]) -> (&str, usize) {

#[cfg(all(test, feature = "std"))]
mod tests {
#[cfg(not(miri))]
use ucd_parse::WordBreakTest;

use crate::ext_slice::ByteSlice;

#[test]
#[cfg(not(miri))]
fn forward_ucd() {
for (i, test) in ucdtests().into_iter().enumerate() {
let given = test.words.concat();
Expand Down Expand Up @@ -395,11 +397,13 @@ mod tests {
bytes.words_with_breaks().collect()
}

#[cfg(not(miri))]
fn strs_to_bstrs<S: AsRef<str>>(strs: &[S]) -> Vec<&[u8]> {
strs.iter().map(|s| s.as_ref().as_bytes()).collect()
}

/// Return all of the UCD for word breaks.
#[cfg(not(miri))]
fn ucdtests() -> Vec<WordBreakTest> {
const TESTDATA: &'static str = include_str!("data/WordBreakTest.txt");

Expand Down
1 change: 1 addition & 0 deletions src/utf8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -869,6 +869,7 @@ mod tests {
}

#[test]
#[cfg(not(miri))]
fn validate_all_codepoints() {
for i in 0..(0x10FFFF + 1) {
let cp = match char::from_u32(i) {
Expand Down

0 comments on commit 0ee9b5e

Please sign in to comment.