Skip to content

Commit 0ee9b5e

Browse files
committed
tests: make it tractable to run Miri
We make an absolute mess of our tests so that 'cargo miri test' will complete in reasonable time. I hate this, but Miri is worth it. Ref #121
1 parent 50086e7 commit 0ee9b5e

File tree

8 files changed

+46
-15
lines changed

8 files changed

+46
-15
lines changed

src/ascii.rs

+13-10
Original file line numberDiff line numberDiff line change
@@ -23,37 +23,37 @@ use core::mem;
2323
// means we can effectively skip the _mm_cmpeq_epi8 step and jump straight to
2424
// _mm_movemask_epi8.
2525

26-
#[cfg(any(test, not(target_arch = "x86_64")))]
26+
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
2727
const USIZE_BYTES: usize = mem::size_of::<usize>();
28-
#[cfg(any(test, not(target_arch = "x86_64")))]
28+
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
2929
const FALLBACK_LOOP_SIZE: usize = 2 * USIZE_BYTES;
3030

3131
// This is a mask where the most significant bit of each byte in the usize
3232
// is set. We test this bit to determine whether a character is ASCII or not.
3333
// Namely, a single byte is regarded as an ASCII codepoint if and only if it's
3434
// most significant bit is not set.
35-
#[cfg(any(test, not(target_arch = "x86_64")))]
35+
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
3636
const ASCII_MASK_U64: u64 = 0x8080808080808080;
37-
#[cfg(any(test, not(target_arch = "x86_64")))]
37+
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
3838
const ASCII_MASK: usize = ASCII_MASK_U64 as usize;
3939

4040
/// Returns the index of the first non ASCII byte in the given slice.
4141
///
4242
/// If slice only contains ASCII bytes, then the length of the slice is
4343
/// returned.
4444
pub fn first_non_ascii_byte(slice: &[u8]) -> usize {
45-
#[cfg(not(target_arch = "x86_64"))]
45+
#[cfg(any(miri, not(target_arch = "x86_64")))]
4646
{
4747
first_non_ascii_byte_fallback(slice)
4848
}
4949

50-
#[cfg(target_arch = "x86_64")]
50+
#[cfg(all(not(miri), target_arch = "x86_64"))]
5151
{
5252
first_non_ascii_byte_sse2(slice)
5353
}
5454
}
5555

56-
#[cfg(any(test, not(target_arch = "x86_64")))]
56+
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
5757
fn first_non_ascii_byte_fallback(slice: &[u8]) -> usize {
5858
let align = USIZE_BYTES - 1;
5959
let start_ptr = slice.as_ptr();
@@ -115,7 +115,7 @@ fn first_non_ascii_byte_fallback(slice: &[u8]) -> usize {
115115
}
116116
}
117117

118-
#[cfg(target_arch = "x86_64")]
118+
#[cfg(all(not(miri), target_arch = "x86_64"))]
119119
fn first_non_ascii_byte_sse2(slice: &[u8]) -> usize {
120120
use core::arch::x86_64::*;
121121

@@ -221,7 +221,7 @@ unsafe fn first_non_ascii_byte_slow(
221221
/// bytes is not an ASCII byte.
222222
///
223223
/// The position returned is always in the inclusive range [0, 7].
224-
#[cfg(any(test, not(target_arch = "x86_64")))]
224+
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
225225
fn first_non_ascii_byte_mask(mask: usize) -> usize {
226226
#[cfg(target_endian = "little")]
227227
{
@@ -245,7 +245,7 @@ unsafe fn ptr_sub(ptr: *const u8, amt: usize) -> *const u8 {
245245
ptr.offset((amt as isize).wrapping_neg())
246246
}
247247

248-
#[cfg(any(test, not(target_arch = "x86_64")))]
248+
#[cfg(any(test, miri, not(target_arch = "x86_64")))]
249249
unsafe fn read_unaligned_usize(ptr: *const u8) -> usize {
250250
use core::ptr;
251251

@@ -286,6 +286,7 @@ mod tests {
286286

287287
#[test]
288288
#[cfg(target_arch = "x86_64")]
289+
#[cfg(not(miri))]
289290
fn positive_sse2_forward() {
290291
for i in 0..517 {
291292
let b = "a".repeat(i).into_bytes();
@@ -294,6 +295,7 @@ mod tests {
294295
}
295296

296297
#[test]
298+
#[cfg(not(miri))]
297299
fn negative_fallback_forward() {
298300
for i in 0..517 {
299301
for align in 0..65 {
@@ -315,6 +317,7 @@ mod tests {
315317

316318
#[test]
317319
#[cfg(target_arch = "x86_64")]
320+
#[cfg(not(miri))]
318321
fn negative_sse2_forward() {
319322
for i in 0..517 {
320323
for align in 0..65 {

src/byteset/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ pub(crate) fn rfind_not(haystack: &[u8], byteset: &[u8]) -> Option<usize> {
8080
}
8181
}
8282

83-
#[cfg(all(test, feature = "std"))]
83+
#[cfg(all(test, feature = "std", not(miri)))]
8484
mod tests {
8585
quickcheck::quickcheck! {
8686
fn qc_byteset_forward_matches_naive(

src/byteset/scalar.rs

+13-3
Original file line numberDiff line numberDiff line change
@@ -192,10 +192,15 @@ mod tests {
192192
type TestCase = (Vec<u8>, u8, Option<(usize, usize)>);
193193

194194
fn build_tests() -> Vec<TestCase> {
195+
#[cfg(not(miri))]
196+
const MAX_PER: usize = 515;
197+
#[cfg(miri)]
198+
const MAX_PER: usize = 10;
199+
195200
let mut result = vec![];
196201
for &(search, byte, fwd_pos, rev_pos) in TESTS {
197202
result.push((search.to_vec(), byte, Some((fwd_pos, rev_pos))));
198-
for i in 1..515 {
203+
for i in 1..MAX_PER {
199204
// add a bunch of copies of the search byte to the end.
200205
let mut suffixed: Vec<u8> = search.into();
201206
suffixed.extend(std::iter::repeat(byte).take(i));
@@ -225,7 +230,7 @@ mod tests {
225230
}
226231

227232
// build non-matching tests for several sizes
228-
for i in 0..515 {
233+
for i in 0..MAX_PER {
229234
result.push((
230235
std::iter::repeat(b'\0').take(i).collect(),
231236
b'\0',
@@ -240,6 +245,11 @@ mod tests {
240245
fn test_inv_memchr() {
241246
use crate::{ByteSlice, B};
242247

248+
#[cfg(not(miri))]
249+
const MAX_OFFSET: usize = 130;
250+
#[cfg(miri)]
251+
const MAX_OFFSET: usize = 13;
252+
243253
for (search, byte, matching) in build_tests() {
244254
assert_eq!(
245255
inv_memchr(byte, &search),
@@ -259,7 +269,7 @@ mod tests {
259269
);
260270
// Test a rather large number off offsets for potential alignment
261271
// issues.
262-
for offset in 1..130 {
272+
for offset in 1..MAX_OFFSET {
263273
if offset >= search.len() {
264274
break;
265275
}

src/impls.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -874,7 +874,9 @@ mod bstring_serde {
874874

875875
#[cfg(all(test, feature = "std"))]
876876
mod display {
877-
use crate::{bstring::BString, ByteSlice};
877+
#[cfg(not(miri))]
878+
use crate::bstring::BString;
879+
use crate::ByteSlice;
878880

879881
#[test]
880882
fn clean() {
@@ -972,6 +974,7 @@ mod display {
972974
);
973975
}
974976

977+
#[cfg(not(miri))]
975978
quickcheck::quickcheck! {
976979
fn total_length(bstr: BString) -> bool {
977980
let size = bstr.chars().count();

src/unicode/grapheme.rs

+6
Original file line numberDiff line numberDiff line change
@@ -263,13 +263,15 @@ fn adjust_rev_for_regional_indicator(mut bs: &[u8], i: usize) -> usize {
263263

264264
#[cfg(all(test, feature = "std"))]
265265
mod tests {
266+
#[cfg(not(miri))]
266267
use ucd_parse::GraphemeClusterBreakTest;
267268

268269
use crate::{ext_slice::ByteSlice, tests::LOSSY_TESTS};
269270

270271
use super::*;
271272

272273
#[test]
274+
#[cfg(not(miri))]
273275
fn forward_ucd() {
274276
for (i, test) in ucdtests().into_iter().enumerate() {
275277
let given = test.grapheme_clusters.concat();
@@ -292,6 +294,7 @@ mod tests {
292294
}
293295

294296
#[test]
297+
#[cfg(not(miri))]
295298
fn reverse_ucd() {
296299
for (i, test) in ucdtests().into_iter().enumerate() {
297300
let given = test.grapheme_clusters.concat();
@@ -333,15 +336,18 @@ mod tests {
333336
}
334337
}
335338

339+
#[cfg(not(miri))]
336340
fn uniescape(s: &str) -> String {
337341
s.chars().flat_map(|c| c.escape_unicode()).collect::<String>()
338342
}
339343

344+
#[cfg(not(miri))]
340345
fn uniescape_vec(strs: &[String]) -> Vec<String> {
341346
strs.iter().map(|s| uniescape(s)).collect()
342347
}
343348

344349
/// Return all of the UCD for grapheme breaks.
350+
#[cfg(not(miri))]
345351
fn ucdtests() -> Vec<GraphemeClusterBreakTest> {
346352
const TESTDATA: &'static str =
347353
include_str!("data/GraphemeBreakTest.txt");

src/unicode/sentence.rs

+4
Original file line numberDiff line numberDiff line change
@@ -159,11 +159,13 @@ fn decode_sentence(bs: &[u8]) -> (&str, usize) {
159159

160160
#[cfg(all(test, feature = "std"))]
161161
mod tests {
162+
#[cfg(not(miri))]
162163
use ucd_parse::SentenceBreakTest;
163164

164165
use crate::ext_slice::ByteSlice;
165166

166167
#[test]
168+
#[cfg(not(miri))]
167169
fn forward_ucd() {
168170
for (i, test) in ucdtests().into_iter().enumerate() {
169171
let given = test.sentences.concat();
@@ -199,11 +201,13 @@ mod tests {
199201
bytes.sentences().collect()
200202
}
201203

204+
#[cfg(not(miri))]
202205
fn strs_to_bstrs<S: AsRef<str>>(strs: &[S]) -> Vec<&[u8]> {
203206
strs.iter().map(|s| s.as_ref().as_bytes()).collect()
204207
}
205208

206209
/// Return all of the UCD for sentence breaks.
210+
#[cfg(not(miri))]
207211
fn ucdtests() -> Vec<SentenceBreakTest> {
208212
const TESTDATA: &'static str =
209213
include_str!("data/SentenceBreakTest.txt");

src/unicode/word.rs

+4
Original file line numberDiff line numberDiff line change
@@ -321,11 +321,13 @@ fn decode_word(bs: &[u8]) -> (&str, usize) {
321321

322322
#[cfg(all(test, feature = "std"))]
323323
mod tests {
324+
#[cfg(not(miri))]
324325
use ucd_parse::WordBreakTest;
325326

326327
use crate::ext_slice::ByteSlice;
327328

328329
#[test]
330+
#[cfg(not(miri))]
329331
fn forward_ucd() {
330332
for (i, test) in ucdtests().into_iter().enumerate() {
331333
let given = test.words.concat();
@@ -395,11 +397,13 @@ mod tests {
395397
bytes.words_with_breaks().collect()
396398
}
397399

400+
#[cfg(not(miri))]
398401
fn strs_to_bstrs<S: AsRef<str>>(strs: &[S]) -> Vec<&[u8]> {
399402
strs.iter().map(|s| s.as_ref().as_bytes()).collect()
400403
}
401404

402405
/// Return all of the UCD for word breaks.
406+
#[cfg(not(miri))]
403407
fn ucdtests() -> Vec<WordBreakTest> {
404408
const TESTDATA: &'static str = include_str!("data/WordBreakTest.txt");
405409

src/utf8.rs

+1
Original file line numberDiff line numberDiff line change
@@ -869,6 +869,7 @@ mod tests {
869869
}
870870

871871
#[test]
872+
#[cfg(not(miri))]
872873
fn validate_all_codepoints() {
873874
for i in 0..(0x10FFFF + 1) {
874875
let cp = match char::from_u32(i) {

0 commit comments

Comments
 (0)