tests: make it tractable to run Miri

We make an absolute mess of our tests so that 'cargo miri test' will complete in reasonable time. I hate this, but Miri is worth it. Ref #121
BurntSushi · Sep 2, 2022 · 0ee9b5e · 0ee9b5e
1 parent 50086e7
commit 0ee9b5e
Show file tree

Hide file tree

Showing 8 changed files with 46 additions and 15 deletions.
diff --git a/src/ascii.rs b/src/ascii.rs
@@ -23,37 +23,37 @@ use core::mem;
 // means we can effectively skip the _mm_cmpeq_epi8 step and jump straight to
 // _mm_movemask_epi8.
 
-#[cfg(any(test, not(target_arch = "x86_64")))]
+#[cfg(any(test, miri, not(target_arch = "x86_64")))]
 const USIZE_BYTES: usize = mem::size_of::<usize>();
-#[cfg(any(test, not(target_arch = "x86_64")))]
+#[cfg(any(test, miri, not(target_arch = "x86_64")))]
 const FALLBACK_LOOP_SIZE: usize = 2 * USIZE_BYTES;
 
 // This is a mask where the most significant bit of each byte in the usize
 // is set. We test this bit to determine whether a character is ASCII or not.
 // Namely, a single byte is regarded as an ASCII codepoint if and only if it's
 // most significant bit is not set.
-#[cfg(any(test, not(target_arch = "x86_64")))]
+#[cfg(any(test, miri, not(target_arch = "x86_64")))]
 const ASCII_MASK_U64: u64 = 0x8080808080808080;
-#[cfg(any(test, not(target_arch = "x86_64")))]
+#[cfg(any(test, miri, not(target_arch = "x86_64")))]
 const ASCII_MASK: usize = ASCII_MASK_U64 as usize;
 
 /// Returns the index of the first non ASCII byte in the given slice.
 ///
 /// If slice only contains ASCII bytes, then the length of the slice is
 /// returned.
 pub fn first_non_ascii_byte(slice: &[u8]) -> usize {
-    #[cfg(not(target_arch = "x86_64"))]
+    #[cfg(any(miri, not(target_arch = "x86_64")))]
     {
         first_non_ascii_byte_fallback(slice)
     }
 
-    #[cfg(target_arch = "x86_64")]
+    #[cfg(all(not(miri), target_arch = "x86_64"))]
     {
         first_non_ascii_byte_sse2(slice)
     }
 }
 
-#[cfg(any(test, not(target_arch = "x86_64")))]
+#[cfg(any(test, miri, not(target_arch = "x86_64")))]
 fn first_non_ascii_byte_fallback(slice: &[u8]) -> usize {
     let align = USIZE_BYTES - 1;
     let start_ptr = slice.as_ptr();
@@ -115,7 +115,7 @@ fn first_non_ascii_byte_fallback(slice: &[u8]) -> usize {
     }
 }
 
-#[cfg(target_arch = "x86_64")]
+#[cfg(all(not(miri), target_arch = "x86_64"))]
 fn first_non_ascii_byte_sse2(slice: &[u8]) -> usize {
     use core::arch::x86_64::*;
 
@@ -221,7 +221,7 @@ unsafe fn first_non_ascii_byte_slow(
 /// bytes is not an ASCII byte.
 ///
 /// The position returned is always in the inclusive range [0, 7].
-#[cfg(any(test, not(target_arch = "x86_64")))]
+#[cfg(any(test, miri, not(target_arch = "x86_64")))]
 fn first_non_ascii_byte_mask(mask: usize) -> usize {
     #[cfg(target_endian = "little")]
     {
@@ -245,7 +245,7 @@ unsafe fn ptr_sub(ptr: *const u8, amt: usize) -> *const u8 {
     ptr.offset((amt as isize).wrapping_neg())
 }
 
-#[cfg(any(test, not(target_arch = "x86_64")))]
+#[cfg(any(test, miri, not(target_arch = "x86_64")))]
 unsafe fn read_unaligned_usize(ptr: *const u8) -> usize {
     use core::ptr;
 
@@ -286,6 +286,7 @@ mod tests {
 
     #[test]
     #[cfg(target_arch = "x86_64")]
+    #[cfg(not(miri))]
     fn positive_sse2_forward() {
         for i in 0..517 {
             let b = "a".repeat(i).into_bytes();
@@ -294,6 +295,7 @@ mod tests {
     }
 
     #[test]
+    #[cfg(not(miri))]
     fn negative_fallback_forward() {
         for i in 0..517 {
             for align in 0..65 {
@@ -315,6 +317,7 @@ mod tests {
 
     #[test]
     #[cfg(target_arch = "x86_64")]
+    #[cfg(not(miri))]
     fn negative_sse2_forward() {
         for i in 0..517 {
             for align in 0..65 {

diff --git a/src/byteset/mod.rs b/src/byteset/mod.rs
@@ -80,7 +80,7 @@ pub(crate) fn rfind_not(haystack: &[u8], byteset: &[u8]) -> Option<usize> {
     }
 }
 
-#[cfg(all(test, feature = "std"))]
+#[cfg(all(test, feature = "std", not(miri)))]
 mod tests {
     quickcheck::quickcheck! {
         fn qc_byteset_forward_matches_naive(

diff --git a/src/byteset/scalar.rs b/src/byteset/scalar.rs
@@ -192,10 +192,15 @@ mod tests {
     type TestCase = (Vec<u8>, u8, Option<(usize, usize)>);
 
     fn build_tests() -> Vec<TestCase> {
+        #[cfg(not(miri))]
+        const MAX_PER: usize = 515;
+        #[cfg(miri)]
+        const MAX_PER: usize = 10;
+
         let mut result = vec![];
         for &(search, byte, fwd_pos, rev_pos) in TESTS {
             result.push((search.to_vec(), byte, Some((fwd_pos, rev_pos))));
-            for i in 1..515 {
+            for i in 1..MAX_PER {
                 // add a bunch of copies of the search byte to the end.
                 let mut suffixed: Vec<u8> = search.into();
                 suffixed.extend(std::iter::repeat(byte).take(i));
@@ -225,7 +230,7 @@ mod tests {
         }
 
         // build non-matching tests for several sizes
-        for i in 0..515 {
+        for i in 0..MAX_PER {
             result.push((
                 std::iter::repeat(b'\0').take(i).collect(),
                 b'\0',
@@ -240,6 +245,11 @@ mod tests {
     fn test_inv_memchr() {
         use crate::{ByteSlice, B};
 
+        #[cfg(not(miri))]
+        const MAX_OFFSET: usize = 130;
+        #[cfg(miri)]
+        const MAX_OFFSET: usize = 13;
+
         for (search, byte, matching) in build_tests() {
             assert_eq!(
                 inv_memchr(byte, &search),
@@ -259,7 +269,7 @@ mod tests {
             );
             // Test a rather large number off offsets for potential alignment
             // issues.
-            for offset in 1..130 {
+            for offset in 1..MAX_OFFSET {
                 if offset >= search.len() {
                     break;
                 }

diff --git a/src/impls.rs b/src/impls.rs
@@ -874,7 +874,9 @@ mod bstring_serde {
 
 #[cfg(all(test, feature = "std"))]
 mod display {
-    use crate::{bstring::BString, ByteSlice};
+    #[cfg(not(miri))]
+    use crate::bstring::BString;
+    use crate::ByteSlice;
 
     #[test]
     fn clean() {
@@ -972,6 +974,7 @@ mod display {
         );
     }
 
+    #[cfg(not(miri))]
     quickcheck::quickcheck! {
         fn total_length(bstr: BString) -> bool {
             let size = bstr.chars().count();

diff --git a/src/unicode/grapheme.rs b/src/unicode/grapheme.rs
@@ -263,13 +263,15 @@ fn adjust_rev_for_regional_indicator(mut bs: &[u8], i: usize) -> usize {
 
 #[cfg(all(test, feature = "std"))]
 mod tests {
+    #[cfg(not(miri))]
     use ucd_parse::GraphemeClusterBreakTest;
 
     use crate::{ext_slice::ByteSlice, tests::LOSSY_TESTS};
 
     use super::*;
 
     #[test]
+    #[cfg(not(miri))]
     fn forward_ucd() {
         for (i, test) in ucdtests().into_iter().enumerate() {
             let given = test.grapheme_clusters.concat();
@@ -292,6 +294,7 @@ mod tests {
     }
 
     #[test]
+    #[cfg(not(miri))]
     fn reverse_ucd() {
         for (i, test) in ucdtests().into_iter().enumerate() {
             let given = test.grapheme_clusters.concat();
@@ -333,15 +336,18 @@ mod tests {
         }
     }
 
+    #[cfg(not(miri))]
     fn uniescape(s: &str) -> String {
         s.chars().flat_map(|c| c.escape_unicode()).collect::<String>()
     }
 
+    #[cfg(not(miri))]
     fn uniescape_vec(strs: &[String]) -> Vec<String> {
         strs.iter().map(|s| uniescape(s)).collect()
     }
 
     /// Return all of the UCD for grapheme breaks.
+    #[cfg(not(miri))]
     fn ucdtests() -> Vec<GraphemeClusterBreakTest> {
         const TESTDATA: &'static str =
             include_str!("data/GraphemeBreakTest.txt");

diff --git a/src/unicode/sentence.rs b/src/unicode/sentence.rs
@@ -159,11 +159,13 @@ fn decode_sentence(bs: &[u8]) -> (&str, usize) {
 
 #[cfg(all(test, feature = "std"))]
 mod tests {
+    #[cfg(not(miri))]
     use ucd_parse::SentenceBreakTest;
 
     use crate::ext_slice::ByteSlice;
 
     #[test]
+    #[cfg(not(miri))]
     fn forward_ucd() {
         for (i, test) in ucdtests().into_iter().enumerate() {
             let given = test.sentences.concat();
@@ -199,11 +201,13 @@ mod tests {
         bytes.sentences().collect()
     }
 
+    #[cfg(not(miri))]
     fn strs_to_bstrs<S: AsRef<str>>(strs: &[S]) -> Vec<&[u8]> {
         strs.iter().map(|s| s.as_ref().as_bytes()).collect()
     }
 
     /// Return all of the UCD for sentence breaks.
+    #[cfg(not(miri))]
     fn ucdtests() -> Vec<SentenceBreakTest> {
         const TESTDATA: &'static str =
             include_str!("data/SentenceBreakTest.txt");

diff --git a/src/unicode/word.rs b/src/unicode/word.rs
@@ -321,11 +321,13 @@ fn decode_word(bs: &[u8]) -> (&str, usize) {
 
 #[cfg(all(test, feature = "std"))]
 mod tests {
+    #[cfg(not(miri))]
     use ucd_parse::WordBreakTest;
 
     use crate::ext_slice::ByteSlice;
 
     #[test]
+    #[cfg(not(miri))]
     fn forward_ucd() {
         for (i, test) in ucdtests().into_iter().enumerate() {
             let given = test.words.concat();
@@ -395,11 +397,13 @@ mod tests {
         bytes.words_with_breaks().collect()
     }
 
+    #[cfg(not(miri))]
     fn strs_to_bstrs<S: AsRef<str>>(strs: &[S]) -> Vec<&[u8]> {
         strs.iter().map(|s| s.as_ref().as_bytes()).collect()
     }
 
     /// Return all of the UCD for word breaks.
+    #[cfg(not(miri))]
     fn ucdtests() -> Vec<WordBreakTest> {
         const TESTDATA: &'static str = include_str!("data/WordBreakTest.txt");
 

diff --git a/src/utf8.rs b/src/utf8.rs
@@ -869,6 +869,7 @@ mod tests {
     }
 
     #[test]
+    #[cfg(not(miri))]
     fn validate_all_codepoints() {
         for i in 0..(0x10FFFF + 1) {
             let cp = match char::from_u32(i) {